diff --git a/.gitattributes b/.gitattributes index ecbd6733767fe5cd6acda87d23875bdce16b0fa0..6c1045b3720c5e6aa3f8a3ae41e82a5d81326097 100644 --- a/.gitattributes +++ b/.gitattributes @@ -38,3 +38,13 @@ backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pd backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text +anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o filter=lfs diff=lfs merge=lfs -text +anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text +anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text +anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text +yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o filter=lfs diff=lfs merge=lfs -text +yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text +yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text +yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..63d60c15c02ba592a06fc67e09c654d568891054 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..9569dff52f023f43117ca926bbde3e1f14003fdd --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..0b28f2bcdbfef92df0153ebf03faaa2bc73158a1 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c9314eb31f87ff6f0f44cbcf948b2c4224a9eafa --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa91240b87d388c80f808d8a78858fc60e197ed5 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..201c1ff4ff577f8ff9d247b699a0118d13adb728 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..6a34d7ea66b7574d48f980820ae8fd055632c014 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..8f7cc5a1f6a8314a0fb47ec38587b39870114639 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..423e5ab89a334fa4ddb0234345c578eee20851cd --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..4a5acd993bcf0a0f439319aef297c4eaf9ec2b15 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..70fa415758e8d88826ce9466a1533dbf91cbcf95 --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1bba6ba8d60d7c73b4a8f81a30d3bfafbcc6c1bf --- /dev/null +++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..864c3a0fc0ada3b8ae6d5c81edc5d12586d3123e --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a34730f54add9830465c52e42fbfc9536b95a29 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..de7481364454b43fb87a1655b09d949110b25c5c --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..34eecf5b883e5b9c6f750e1e747313b6202c5291 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..002f85af4093414f60b6e37e5edc14b204758ac1 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..b3f476c95ea36a4bf987b132a41393c5d09ef19c --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..b505369a59c4e6956fc3222dea1d31be4a831ff8 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a678cdbd0c0195b00d3315750ec658810a0bfaa --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6c339820bdc37d7f054932b6d74615188021197d --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c19e8416b340d0c95e10fd3390f640a07f7184f5 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7cf973f734fd70cea269c6a60dc0093e29bbc04 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..b8d051a83ac473fc18d14c10b8226dc414381d9c --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c4d48bc7e63e6428984d0bc2129742f2f7dbc262 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..7555c91df9a6110009920a7b5ac22c155cc59cfe --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..820f13ab195e62fe83b6a5c8d8086ef3ffb62b28 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..62551ec728f7b1283c495996b80d72abe2302686 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..3a2cda8b9fa5b1093cd4327dcc407bff408a00e6 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..e270b46619490eec7d96e25950138a4e96238d6a --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..65ca7f9d880f365ced5096533819011ad152b1be --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c9d59a3f0a2f985360f987d74142d74c3ad8ce9b --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..87364862dc54a01ff7835edf337d948ef7aff565 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..73c09505c83eae12bc26b6b16f8e4239aa5914d8 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff51f66d82ad89fac2bd3d3340d30d2d2d5c1885 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..a9391a3c8b96f98d601212fe5fee01d56fd73b2f --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..756953c9ca6e0fc91efe05cd28cd8d01f18c1700 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..fdc8083bf18fbabbcdc25a93b1f095f6a276a544 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c6bbfc78122b62b4c6f8bb4abae8adc706e30ad --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8527b0a6296d1a970dca3691f3d31ea3dfa281d --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d9d74565b11b912a681a1871360ed0bd2385ff9 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..57f86183de530604294a363c8387c1c8b49e93af --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..de9accc5c444a46a8639dc63b3d2841b6b1fdb28 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..a600901cb9f10422e2ce532cb8c77ac03dc57959 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..29e6ac298128de00d5eafc256f3cefc35eb26585 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..dae41f38476b77921cdb2f030d11a1f32076622b --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..3741fe96fbe15b96ea12feca4e9fa98e58b4b141 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ee94d104a9e0878da2aa2e588adeb888ff12355 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc15339112fa0ff01c5615b311bebee685e3c089 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..38f4cd642822a36efd860f20971030093b467b26 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c08068e50a1b52db2346ed7d91f994822ecb308a --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..e3d37e0af57206d3a8ace41cd93cac2d92a99aad --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..39fac1ab1f5ba0ef8be9166ab400b0303dab3c55 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..2342e4a5a58d9938a847f0bf11ea87de5900dadf --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..d90465e8281858d7557440ae000a2d8030b5f1be --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca08a4041f2660b9eef0f6db53a672d88bfaa52e --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..18d258a9eff34e7bbbcdebcd0462250746352d21 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..739efd5c2d9c526fac569d14c81206c02a677755 --- /dev/null +++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbc433e0a1e0d5b37361a96e3970c0d720639db4 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml @@ -0,0 +1,17 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d4e2a056537258e7c06849d22d4c26c7b25e223f --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml @@ -0,0 +1,17 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..72849bd96ab440b568774e6ee8a57f6ed6788162 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml @@ -0,0 +1,27 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: similarity +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..fbcd022d8623d2fa4e95c31b1f8f6adef8076c1f --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml @@ -0,0 +1,27 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: similarity +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa5891c2d59ff2e88a6ccbca706f7ca15f539976 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9d3fff7669479f43563fb8f80dc98091b835cd49 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d00afb6f6a5f3d979ee0b513299460ed59528d71 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b56d4945517c6bd61351fcbef05b48c9b7448d25 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ee0f7cd39031aac12a49fa3febbad874ee84eb3 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ee646dbe8a44dc5bd827e2ff354f954306512e --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bcc52404af176f94a0d3cecc3fcff26900f73e07 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c22d10c1f33975e94f1777fc6529b90feb81ba71 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a4ee8540f6cbd301e407c8fd4795518a729b2a7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e5cc7747b0917df6ff67aa7af2dcc961853d3643 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8ec8379c8cfae0384ed5c5a23cdc8ba28b250d8 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7174b3bc97912d16e2db4f3948daf1d5ccca79d2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..047111607ce477a768f570d0c1bdbe1809fd2b27 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ec22f2c3149a6bc2a839c8b7a1e7f95f799a740b --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bcd7b440c4ff76e8b2cfa3324524c6a0fb7f8b3e --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7ceeeab0f8eddaef985f7f96f30092bf3be1c477 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..b9514fabfbb681385913ea0ab7edaedaaa62b628 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..521dc511180da8f9b94569e4ab0a45844266d973 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c59ddd576b3753f4db9a137e8d3cb80dd233e0a4 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cbe60a5936fe763099137039a222cd0563ebaea9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9960dcaa94683c69f1c63bd6fadef7b17315ebea --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e57a1150b7f8eb19b75d32729945101e3f63970a --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a639c02411b28e7b61485c8306201526a98bb30 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..09300cdacf1b21dae6e6d983cc53c0c3674f146c --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..d933f4e367996fcd355585308151316fde844160 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9289c03643143a572b64987cf8b733118531b7a2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..ecd4bd64bff3f0c9ec4c9a532146ccd657edc907 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d228dc7e9de40c84d91a90b4e9e3accd41af0d7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6fe87609b78a2e77ad203ca2136882ba13568493 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..d21448627979c6757d3299faee2ea5ab4d2d1b09 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc8c5a5dc77dae477e0f880e515993a46357a8c8 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..665f42194576bed58d657628d916710efa51b514 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc8f3897e2e398bf6cedf9c02936553098f2c73 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ac2b451a19de13876e1d0dd042878289fdaa195 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..935b0ececa15dcf4658c1e10a5ae52b93079b0fc --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml @@ -0,0 +1,11 @@ +id: anet_c3d_pdvc # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..e0db6b87acea5ffa66e35e868e44194a04c39852 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml @@ -0,0 +1,9 @@ +id: anet_c3d_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..828311fc71fcc95e9b1a08506d11bb6ab602b665 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml @@ -0,0 +1,53 @@ +id: anet_c3d_pdvcl + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..02b38b6f2dbbb53b838d9bfbab8cf268a7c02c62 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml @@ -0,0 +1,55 @@ +id: anet_c3d_pdvcl_gt + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 10 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +#with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0.00001 +set_cost_class: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 0 +bbox_loss_coef: 0 +cls_loss_coef: 0 +count_loss_coef: 0 +max_eseq_length: 10 +#lloss_cross_entropy: 0 +#lloss_focal_loss: 0 +#lloss_gau_mask: 1 + +#two_stage: 1 +transformer_input_type: gt_proposals \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d2aa20fce1241e60ad77a69980acf1e3b653ef1 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml @@ -0,0 +1,51 @@ +id: anet_c3d_props +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +train_proposal_sample_num: 15 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 10 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: none +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 0 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..147d726179a848dabb0367b22575fa2f20de4097 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_i3dvgg_pdvc +base_cfg_path: cfgs_base/anet_c3d_pdvc.yml +visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish'] +visual_feature_folder: ['data/anet/features/i3d/', 'data/anet/features/i3d/', 'data/anet/features/vggish/'] +invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json'] +feature_dim: 2176 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a6991e551815ec0ac234c30ab3a6d09f1bd75cf --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_i3dvgg_pdvc_gt +base_cfg_path: cfgs_base/anet_c3d_pdvc_gt.yml +visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish'] +visual_feature_folder: ['data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/vggish_npy/'] +invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json'] +feature_dim: 2176 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..f30c2ab5a626538f4dbc2c1a1bc497196ff46f24 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..2d3b50035fe6a95bb2a5790f8b3611be54fc0fa7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..270ae993e3f72bc2d9091b4809e8715fc6c86dae --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7340a306c59e2ac685e8c59ac2960fde366e9c7b --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..16f3082ecb947291ccb4f2226312fcf3fa06d349 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c167a9b0499503b9eff84d0c1ea1aa42453cf117 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c48adf788535e24daf8e7ffe16f2e60009118f1f --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..65d5217788315daae5e6bbf002eb746b010e2bde --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9dc19b7dcbc2f364b03abc0014d17eb6375b4a99 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..042d3f885aff7b261f66fa03dad252aedbf2fcf9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f6e44731cf2c826b91b8173148e120b64d04f66 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4e0f151d036988969e902c436085fa850bb50a4d --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e44527dea994822b026b814df9b354aff082b53 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..08f52e1ac3b1ebca5e2d62c639fd5f6b5752ddf9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bd202e1359b3d756d903c93acf07e4dad268323e --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3fe14dc05390932f58a3f5ce8b3ffa3828296200 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..211b0adc17f02c1b64ce3ceff2c0122c7581eada --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..674edd157c6ffe26bd7e9248faffc1a68a997d35 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..20243b5ac8be187c91a7e54eb86cc27db6f21559 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cb675d9549b34cf0ee2258d3e6a107273d1e4ffd --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..586821983af378162d355b298b2788e0c651e0e6 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..42476cbf72bb3d8b304e89192a30bda1606046aa --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0a8e97b44c541a99afc965764187cf264bd4268 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9e0ce1a0db8dd453b811f6f0d5609f8ae7648a6d --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8725e365b44b967e794cdc16423a571c71e33bd --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..2fd6450a47d8392d35b67b997ec6173f35b6ee4b --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5cd314fb9fd20fa05b1b5417604b40d115ee008e --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..23e3dae52ca63a879083d7eafc8c1ab7e1556d71 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4a108d38d2c512864344770d5943e439eb151d5 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c30ad37380e6b9ef2b845061471ab2a4ff293d91 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b140662d119689dca4e409f85da91e882323bc0f --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5e03028433a6bbc1c25ae936d791096e8e7826b2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ad1160b06416b51bd4e728eef5e6225f023796c0 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..f56ac030b287f6c0a806833b546d90a9d8fe9670 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..79f9caa36975efda224cb605af412efda721e7dc --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvc +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..c748cd44f1b9ea7607e4482da4af8444347d3f88 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..5543e4e259942b72d98f1fe16cd4311be93ef3c7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvcl +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..9804be364f78a4a8f26e30e0e6923558194edcd9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvcl_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..1c4ef82922a7df99f37d1a626d4a89e8c9b95722 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvc +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..df92966691ed0fa33bc4b7417f6c0ade5b383869 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..c5298c707ab8887be611c86d522e855a8a5123a4 --- /dev/null +++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvcl +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml new file mode 100644 index 0000000000000000000000000000000000000000..d83ae5b6762ddc39bcdab2aedddf47a6ed8571d3 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_anet.json +vocab_size: 16221 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..a7bf2a745aecc0b05232f717c81a97333ee55af3 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_puyu_anet.json +vocab_size: 15249 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..17b3bd0263edd713fc329bf1df7b539e2f160b3d --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2.json +vocab_size: 14538 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..0f9ec30bf455a8a9d51bb867bdbc8e4d514c8006 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2_puyu.json +vocab_size: 13411 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml b/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml new file mode 100644 index 0000000000000000000000000000000000000000..3deec04627b419ff129a14bcf6ef5f8382bca7af --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: 'data/howto/captiondata/howto100m_train.json' +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_anet.json +vocab_size: 16221 + + + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml b/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..85343a3924a24e42054f963f220b2a3e93769070 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml @@ -0,0 +1,62 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/howto/captiondata/howto100m_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2.json +vocab_size: 14538 +# dict_file: data/howto/vocabulary_howto_rate2.json +# vocab_size: 14432 + + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..92c6b64a0b9a276122b86cabe3ad428fa8fd6c8a --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..dfb930405a3050a89e929c9219635231b546d3cb --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..15f8524313eeff1620c67764a09bb5268d50c249 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..1c19e4b987a23b04a75b0eba01abfe0de360783c --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..6141c44f52d807457f9cf0c759ae34f0ce6c024c --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f45381d945d502f5a7d421ac2e2d17a7abcd5d87 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..351d2d32411e886fb1dbbe52674c262b39e1ca77 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..691b7faef9301ea5d8b205f226ecdbfe7f0618c9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..73f5f0634fa1d4b00a2fb49f1793d72e67c16c87 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..078379e88f4811421e58a8d7930e932ca6641e24 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a98bb3237ea032ebfca52ae34e59d88aa3592ffa --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f3192d4aa5b4456deba35f0f4e404a2b11fa7e00 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b6b1224667dae251754c76aeaccbf93a63893e54 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..5545fc09291a7589f59c8725f38a17144f43f826 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c202054206ca67c6885cf4a34bf12a2c9eb163ef --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebd15f0c193ea72d878905401027fd28a330a6e9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8dd87d3485fe65777b79be423ffe881796fe879 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9bf87a23994a2adb512b6fb1d1a2188f70de82a6 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..271b44b03d385f7ad93fdfff959e8b41486451df --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9779a1898513b16aa1c2aaa57f4eb6255b2f9253 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..996e5360fc812f1943959c0bc468974117a97a94 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..c56573da93d2d948eae7d723904a9243498ac484 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4940aa43eec6d5fcb08905bac7892f10b28c7549 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f373f5e6dfc0a586f104777611c18ad10dcddb9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..e28db709d1590f546524626df9b13676034d0489 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f8bf9b58b304ea4bc7becf5368d3025b192e5ea5 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c56556156aa50810c468bb22a2d00ace2213860 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2ac92f31028eece0c85e6cc642876b3fee015063 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..456af3710b005fecd7848bf8bfe73e7de8dd58df --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b5ab9e0bdfca3c12d8c932e52a0e0e20bf6e759a --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..c6cdee3f0a43f9eee2898c610868aea88103ad9a --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e32f2b3092a0f11c35bf4a2ab0ab62172c23815 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9205bfd2568d41ec26689b63fcdb82f30f1e0c7b --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,48 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + + + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2ed0b046f4922fec52231c7c0d5c551cff82f0cc --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b40c179f05dff58d76a601a232e6fada42f29505 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..32e237518518df60263b914b65d18dba1d0b8f46 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1dcb3b5b547fda7ed525234c4f97976badc60c04 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 30 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..6f775d16de8d0ca478c89310cf56ae9cc12b6d6e --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..04d7bda3fde5f365542bbd033cee609457c89604 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 30 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebb97f21d137b0826c96dfea0a86e234195d39be --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..47aa1ebe8a83f7375281ca97c736b47b312d7806 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..86ad0779edfff76fd33862932cd3b534902be794 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..11cbc212d5837924bc8afb7d4635427be51ee216 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..0dcf153ed24336b039f3e288035937de41f36a94 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..424cf659ab8edb6fc4b81364ce47a2d568f65c07 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..53a57713fe15fb61233549dc5ecb309e986a5508 --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..93346fbdee8e6aaf398c9c429b91cc825377c9aa --- /dev/null +++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..f5f9a193a786aad4960447288edf675aecb58129 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml @@ -0,0 +1,21 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_anchor: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8dbabf6d032f5848991215aec06c7fcef0fb711 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml @@ -0,0 +1,21 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_anchor: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7a064f9627e6d2f00667c740f27f3564ce16d63 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml @@ -0,0 +1,32 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: align +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef7c8924196f230fbf7abf4bd2aada4d8c813275 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..355c7d66081451b3513304f8b81126fd1976f918 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5121b6cf76c3b4966c382f4cfca11c71891a721c --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a95a0e782daf8c2f799f90d93aa95a6eceb015b2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..59967b4b1bc17087eda8b17e520c3e83e153e699 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..178db2ad770c80e3d3d365842e7df6f2a24c0fee --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..7555f552813b710ea274c5068b4b4b46b15e00e8 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..726c043ff0300b678776e8391d1bb3ed962307cf --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..71be5f3eb1240f38e131b7600e58cf1a1b3f7b3d --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc03cac62cb816ce7b98e9fcf4ff96167d03b682 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d054ac67dc4a7765def67b554b1c520c04cd18da --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..27e89d7b820b566c825bf2be4e6164f218664588 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..39dfc692b19e2acf130606f314214410f0720990 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..fe6cba647bb5d104b990a3415706d56971426b1f --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..05c7d7053dd25cb4a3e06340268c29a14d7dc05e --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..40fd330d45cf07a8e34c442d56c1339fa237c5e1 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd4ead981978c54801c70e6dec9fc4154f6fcf40 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..86b411963f0ab337fa843b01a49b743c55d0f4ba --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4b959243136e53599aa026ae7a386d9ae8ce41a --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..259b30520a7d6eb411b14497d97e2b47939933ae --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..59ad8e64a36d46297e604044ab9fa8c0fe8bf3e1 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3f8a497a405c767f138ba08ec811b417d62e5674 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..0aebca8432993dd31f123e618c40a701da8ed968 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..29a1b716404048c53360c2e95eb69510f03eb1e9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,61 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..9c2142a150ea00790b8c18556d0004bfa89afae8 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b624be12c30a6132baf378a696b5d85949754b5 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,49 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 + +pseudo_box_type: similarity +top_frames: 15 +window_size: 3 +statistic_mode: mode +width_ratio: 1 + + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..3aedf7494857125480dd5c5dc50bd3a5cf9f4260 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5104cde9760d48a7f9fa23cb1a04b825c18f3844 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8bca42c4f97b02f9f9bb5f73f37c7660a2c17f64 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..f75a379f584c0e1c6937f7382106b41aa13de36b --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..70bb98540afb82ad228fd38386b705eac0186b43 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..377d0315f95c91a695c3452a623a2178f36f8b5e --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a43f4fbb5db27f501c2e18f01a4ce609982ca832 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..edb891d5da98423d456c7b80ab35cc9a50143577 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..b51463e36ec85d9216a6644801548bae016228fe --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..349183f42fc665a103e3b1b628b6f055bfbaee2d --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..87138b61dc4b554deebc0245686a152f593825fc --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml @@ -0,0 +1,57 @@ +id: tasty_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/tasty/captiondata/tasty_train.json' +val_caption_file: 'data/tasty/captiondata/tasty_test.json' +gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json'] +gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json'] +dict_file: data/tasty/voc_tasty_14.json +vocab_size: 14670 +max_caption_len: 50 + +train_proposal_type: gt +train_proposal_sample_num: 50 +gt_proposal_sample_num: 50 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: standard +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 # 42 is the max number of events in tasty +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml new file mode 100644 index 0000000000000000000000000000000000000000..1a82a7274fb3f956c8545096bdf86e3e1f9c0468 --- /dev/null +++ b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml @@ -0,0 +1,57 @@ +id: tasty_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/tasty/captiondata/tasty_train.json' +val_caption_file: 'data/tasty/captiondata/tasty_test.json' +gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json'] +gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json'] +dict_file: data/tasty/vocabulary_tasty.json +vocab_size: 30171 +max_caption_len: 50 + +train_proposal_type: gt +train_proposal_sample_num: 50 +gt_proposal_sample_num: 50 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: standard +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 42 # 42 is the max number of events in tasty +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml b/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..d14b206d40c4f2399400913d1ff15b8659b575b9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/vlep/captiondata/vlep_meta.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/vlep/vlep_vocabulary_rate2_yc2.json +vocab_size: 4491 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cccf2ec29dc7c513d8b4cc90d4a6f6a3fabbc28d --- /dev/null +++ b/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,44 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/vlep/base_vlep-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/output/vlep_clip_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/vlep/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..00399b0c2f2a021a7476750b003026045d776cd1 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml @@ -0,0 +1,20 @@ +id: yc2_UniVL_pdvc +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] + +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..ab29c8850e08c4549496f768a65f7ff4d08f33ba --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml @@ -0,0 +1,19 @@ +id: yc2_ViP_pdvc_norm +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP-ViP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text/'] +feature_dim: 512 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8ecdedb6ef1d7787c74040ef64b644bfb98d956 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml @@ -0,0 +1,29 @@ +id: yc2_abox_ViP_pseudo_similarity +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP-ViP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text'] +feature_dim: 512 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_type: similarity +use_anchor: 0 +pretrained_language_model: CLIP-ViP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c15a2a7c6e8dea1452f91f26dda6fe9fb6ebe8f7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..217693d7afa3593227e2d368fdb2552cd9371369 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..13eef191acd1177c2d4e7bdc64f2b755e80ab5e5 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..88bd8b2fec5674da63b5171fa4c7bfeee0426fc9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..4846c148573235eb6aa047cb024e6c78a4e1cba2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4b329f14bec0848ba829c1b4048e7fe22fb46e83 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d62fd00faf076f269351f19da5083817b419ff0 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e25901b1bf82142d41e05343f65e1ba59b8d908b --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..77143cc57f11432ea5001da37c0014eb1696acc2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..87ff91cad79953a42bb1b582e8170779d1e147c7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..388cbc2b2527190f4ccf17c7006ec9adee33ae5e --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8b7d7cecca9a90d08a1fe516e115891645fb0b6 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c49641fdda1e81d5e713e9b615d3d1186a7fea7d --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbb37176adcdc33825abe0ed4a943588ad157f4a --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b271a74c0f4f3d976410fcc2b607d12056124d08 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d91010f653fe7c8f52caead5e7737a8ea102fd9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..bed129c0b6368d63e430310d7caa0ce4a633e329 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca26e6a13b9df3b80e91cf34b8668676895d6214 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c5086a3c1ab5482fd8595d66ebd5cecd3da4502c --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7ec5cdeb72ada972c4b0ec906ed3d96060a7e018 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9d56829b138e152c73a8513f85f3f086c40ae838 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..042ae96b01a2c741fe1304441db5bd0ca14109b4 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..e9f47c5587ae99e197c9e4c2d87ccc4545923143 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..78f0529a85faecb3ca48833e213e1be64072439a --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..615d745a91273d92d158ecbd28e9eb7e5ec77640 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..f2fe9580c80c0a6c19ebd865cc9625a7f3b997ec --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a3866db1b5556c8b1c7a3f37a37686bdc2170a13 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..707926c6592afdac874f5ba9bafb7c3855e9b18e --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..cbc2e0503505aa158c62d8f3687cce87a45ece37 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ff93b47407b09e7af54d101c6dfcab1d359c1d3 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8bdddf2857f8baee722f798460eee584a75e07e --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..35cf1c1bee6180fdd84cf580e3266c24f80bfb2b --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c68cd4381b5ddc5447b4da98c968702fcfad52d2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1dcf629977378040564bf4b9256f66fe8a76282 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6768351e467f9cdcfb4cf503621faee1d28da7d5 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..48c7d7ff81d64b9b885be7e8c9c114d5b3a177c6 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a2880d6336436d6d2da1330e3835323340f5ddd7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..96d7bc4b0db3ea7647ac39f487d43fe96d1b9846 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6dd2d07dd9d060137eeb3f135a7e2a420d110fdf --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..6043b9b8e94f7f09c976fd10fd340ce0370dbc21 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef3bd5c6fccf6e8fa460e56fbc89d5cda0ad5a16 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ba9e35266c5ecce655d98fbda735fffd5505ab3c --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..373ef4c276a633c7923d9cdf106d81863573f378 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e6e781eb010da1f9c67a8362776119348f972795 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c2b90ed1817ba9b5af6f71c7aa48499550fed905 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..71cdb3e5f6a305eeefcf987c447ad38ce2c0b9eb --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..14e4add2192671c62c3c5ecab34be01db392ead2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..658a48fbf1a31ce7522f65e4f04453965f1a1130 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8c2448ad587b99db12d4004259da8ad83c400547 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca9efe1df117e81a57d9f4c0b37dccef8ce071a4 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ae2b3a1874058268562185df28b4568088518bd --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f416921a9d59d98d643118774215085f50e4adf --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a43d8c6e42cb71191f4db57266b12c2f8ff1df7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ce8c21fb91989e7b37c1504989be65eaa5c17c1 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..25c75deb4061ea7fe6c59f5d63d26b85c332f8ce --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c1a0e79d75681650dce5753ea1934f39e05931ba --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..7e6f5990069b64bd922d068c9721144ee3ed8467 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a061bab74963587e8c148e67c47ba3e8d8e7bdc9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,43 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6cf3256fca24a4cdaa0c9e8c89a4fed74edd684b --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e13936ee7c32a85bb8553de1bddf8dc85f6acdd7 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..31d2af3cfa6d15ec3ce1067648dcb89cf410d309 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1d5096ca8903c240faa1e45589e77786800bf4d9 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c04663ab87edcd38ea5a0fa00c17d99f3203fc02 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca1d85d39c264126d0cdcad3fb4ef6dcd9d78249 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..e431de6395e6a745cf9c9e5f560621c1ef911015 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..d4b9dec009027bfad79d96f94545c19465146271 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..e66d364a384fa467573af7703d97baf23098c9a2 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..60d558dd0b51cbe8d184681d7227c91e76246540 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc66b3cbff2550bf0264a79dd43d6b93ab7256a0 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml @@ -0,0 +1,13 @@ +id: yc2_tsn_pdvc +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..9a1c528c5c792081cbb4873983306c4268a23d55 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml @@ -0,0 +1,9 @@ +id: yc2_tsn_pdvc_gt +base_cfg_path: cfgs_base/yc2_tsn_pdvcl_gt.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml new file mode 100644 index 0000000000000000000000000000000000000000..79ef87700f600af96cb41f1953b4fb1da336c8ec --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml @@ -0,0 +1,16 @@ +id: yc2_tsn_pdvc_prior +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 +num_queries: 50 + +ec_alpha: 1.0 + +transformer_input_type: prior_proposals + +#dec_layers: 3 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml new file mode 100644 index 0000000000000000000000000000000000000000..14941f50b2699cc25e74ee388bfe086ae0bda74d --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml @@ -0,0 +1,18 @@ +id: yc2_tsn_pdvc_prior_add +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 +num_queries: 50 + +prior_manner: add + +ec_alpha: 1.0 + +transformer_input_type: prior_proposals + +#dec_layers: 3 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..1420f8abf88d8bbdd6c9cf05454f0949a9fb6c44 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml @@ -0,0 +1,55 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/yc2/captiondata/yc2_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +dict_file: data/yc2/vocabulary_youcook2.json +vocab_size: 1607 + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..435e85fc3946b15c389de755987b34f8bd75d469 --- /dev/null +++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml @@ -0,0 +1,57 @@ +id: yc2_tsn_pdvcl_gt + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/yc2/captiondata/yc2_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +dict_file: data/yc2/vocabulary_youcook2.json +vocab_size: 1607 + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +#with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0.0001 +set_cost_class: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 0 +bbox_loss_coef: 0 +cls_loss_coef: 0 +count_loss_coef: 0 +#max_eseq_length: 10 +#lloss_cross_entropy: 0 +#lloss_focal_loss: 0 +#lloss_gau_mask: 1 + +#two_stage: 1 +transformer_input_type: gt_proposals \ No newline at end of file diff --git a/anet_clip/backup/change_config_add.py b/anet_clip/backup/change_config_add.py new file mode 100644 index 0000000000000000000000000000000000000000..610c71dbf03a1817cda08454698805982df1f985 --- /dev/null +++ b/anet_clip/backup/change_config_add.py @@ -0,0 +1,78 @@ +import os +import yaml +import argparse + +# add dryrun option +parser = argparse.ArgumentParser(description='Change config files') +parser.add_argument('--dryrun', action='store_true', help='dryrun') +args = parser.parse_args() + + + + + +# Define the folder containing YAML files +folder_path = 'cfgs_ref' +# folder_path = 'cfgs_base/anet' +# folder_path = 'cfgs' +file_filter = 'yc2' + + + +# Define the string to find and the replacement string +# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video' +# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj' +# find_string = 'data/yc2/captiondata/yc2' +# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text" +find_string = "UniVL_refine" +# find_string = "pdvc_mode: 0" + +# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual' +# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text' +# replace_string = 'data/tasty/captiondata/tasty' +# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml" +replace_string = "CLIP_refine" +# replace_string = "pdvc_mode: 1" + +old_name = 'univl' +new_name = 'clip' + +def replace_yaml(yaml_file_path, new_file_path, old_string, new_string): + # Read the YAML file as text + with open(yaml_file_path, 'r') as file: + yaml_text = file.read() + + # Replace a string (e.g., 'old_string') with another string (e.g., 'new_string') + + yaml_text = yaml_text.replace(old_string, new_string) + + # Save the modified text back to a YAML file + with open(new_file_path, 'w') as file: + file.write(yaml_text) + + # # Load the modified YAML data (optional) + # modified_yaml_data = yaml.safe_load(yaml_text) + +# You can now work with the modified_yaml_data as needed + +filelist = os.listdir(folder_path) +# Iterate over the files in the folder +for filename in filelist: + if not file_filter in filename: + continue + # breakpoint() + if filename.endswith('.yaml') or filename.endswith('.yml') and old_name in filename: + # breakpoint() + file_path = os.path.join(folder_path, filename) + if old_name == '': + new_filename = filename.replace('.yml', '_{}.yml'.format(new_name)) + else: + new_filename = filename.replace(old_name, new_name) + new_file_path = os.path.join(folder_path, new_filename) + + if args.dryrun: + print("Dryrun: {} -> {}".format(file_path, new_file_path)) + else: + replace_yaml(file_path, new_file_path, find_string, replace_string) + +print("String replacement completed.") \ No newline at end of file diff --git a/anet_clip/backup/demo.py b/anet_clip/backup/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..b8e3ab4946905f140f377d120a14deff85f4622f --- /dev/null +++ b/anet_clip/backup/demo.py @@ -0,0 +1,44 @@ +import numpy as np + +# Example similarity matrix with shape [10, 200] +similarity_matrix = np.random.rand(10, 200) + +# Example range of indices for each step (stored in center and width arrays) +center = np.random.randint(0, 100, size=(10,)) +width = np.random.randint(10, 20, size=(10,)) + +# Calculate the start and end indices for each step +start_indices = np.clip(center - width // 2, 0, similarity_matrix.shape[1]) +end_indices = np.clip(center + width // 2, 0, similarity_matrix.shape[1]) + +# Generate column indices for each range +col_indices = np.arange(similarity_matrix.shape[1]) + +# Get topk values and corresponding indices +topk = 5 +topk_values = [] +topk_indices = [] + +for start, end in zip(start_indices, end_indices): + # Slice the similarity matrix within the specified range + range_values = similarity_matrix[:, start:end] + + # Find the indices of the topk values within the range + sorted_indices = np.argsort(range_values, axis=1)[:, -topk:] + sorted_indices += start # Adjust indices to the absolute position + + # Flatten and concatenate the indices + row_indices = np.arange(len(sorted_indices))[:, np.newaxis] + indices_flat = np.ravel_multi_index((row_indices.flatten(), sorted_indices.flatten()), similarity_matrix.shape) + + # Append topk values and indices + topk_values.append(np.take(similarity_matrix, indices_flat)) + topk_indices.append(np.column_stack((row_indices.repeat(topk, axis=1).flatten(), sorted_indices.flatten()))) + +# Convert lists to arrays +topk_values = np.array(topk_values) +topk_indices = np.array(topk_indices) + +print("Topk values within the specified range:", topk_values) +print("Topk indices within the specified range:", topk_indices) + diff --git a/anet_clip/backup/eval.py b/anet_clip/backup/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c59801e0e5a9e72ce22521699e53d796efd49b --- /dev/null +++ b/anet_clip/backup/eval.py @@ -0,0 +1,146 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import json +import os +import sys +import torch +import numpy as np +import time +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + +from eval_utils import evaluate +from pdvc.pdvc import build +from misc.utils import create_logger +from data.video_dataset import PropSeqDataset, collate_fn +from torch.utils.data import DataLoader +from os.path import basename +import pandas as pd + +def create_fake_test_caption_file(metadata_csv_path): + out = {} + df = pd.read_csv(metadata_csv_path) + for i, row in df.iterrows(): + out[basename(row['filename']).split('.')[0]] = {'duration': row['video-duration'], "timestamps": [[0, 0.5]], "sentences":["None"]} + fake_test_json = '.fake_test_json.tmp' + json.dump(out, open(fake_test_json, 'w')) + return fake_test_json + +def main(opt): + folder_path = os.path.join(opt.eval_save_dir, opt.eval_folder) + if opt.eval_mode == 'test': + if not os.path.exists(folder_path): + os.makedirs(folder_path) + logger = create_logger(folder_path, 'val.log') + if opt.eval_model_path: + model_path = opt.eval_model_path + infos_path = os.path.join('/'.join(opt.eval_model_path.split('/')[:-1]), 'info.json') + else: + model_path = os.path.join(folder_path, 'model-best.pth') + infos_path = os.path.join(folder_path, 'info.json') + + logger.info(vars(opt)) + + with open(infos_path, 'rb') as f: + logger.info('load info from {}'.format(infos_path)) + old_opt = json.load(f)['best']['opt'] + + for k, v in old_opt.items(): + if k[:4] != 'eval': + vars(opt).update({k: v}) + + opt.transformer_input_type = opt.eval_transformer_input_type + + if not torch.cuda.is_available(): + opt.nthreads = 0 + # Create the Data Loader instance + + if opt.eval_mode == 'test': + opt.eval_caption_file = create_fake_test_caption_file(opt.test_video_meta_data_csv_path) + opt.visual_feature_folder = opt.test_video_feature_folder + + val_dataset = PropSeqDataset(opt.eval_caption_file, + opt.visual_feature_folder, opt.text_feature_folder, + opt.dict_file, False, opt.eval_proposal_type, + opt) + loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn) + + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = val_dataset.translator + + + + while not os.path.exists(model_path): + raise AssertionError('File {} does not exist'.format(model_path)) + + logger.debug('Loading model from {}'.format(model_path)) + loaded_pth = torch.load(model_path, map_location=opt.eval_device) + epoch = loaded_pth['epoch'] + + # loaded_pth = transfer(model, loaded_pth, model_path+'.transfer.pth') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + + model.to(opt.eval_device) + + if opt.eval_mode == 'test': + out_json_path = os.path.join(folder_path, 'dvc_results.json') + evaluate(model, criterion, postprocessors, loader, out_json_path, + logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=True) + + + else: + out_json_path = os.path.join(folder_path, '{}_epoch{}_num{}_alpha{}.json'.format( + time.strftime("%Y-%m-%d-%H-%M-%S_", time.localtime()) + str(opt.id), epoch, len(loader.dataset), + opt.ec_alpha)) + caption_scores, eval_loss = evaluate(model, criterion, postprocessors, loader, out_json_path, + logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=False) + # breakpoint() + avg_eval_score = {key: np.array(value).mean() for key, value in caption_scores.items() if key !='tiou'} + # avg_eval_score2 = {key: np.array(value).mean() * 4917 / len(loader.dataset) for key, value in caption_scores.items() if key != 'tiou'} + + # logger.info( + # '\nValidation result based on all 4917 val videos:\n {}\n avg_score:\n{}'.format( + # caption_scores.items(), + # avg_eval_score)) + + logger.info( + '\nValidation result based on {} available val videos:\n avg_score:\n{}'.format(len(loader.dataset), + avg_eval_score)) + + logger.info('saving reults json to {}'.format(out_json_path)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--eval_save_dir', type=str, default='save') + parser.add_argument('--eval_mode', type=str, default='eval', choices=['eval', 'test']) + parser.add_argument('--test_video_feature_folder', type=str, nargs='+', default=None) + parser.add_argument('--test_video_meta_data_csv_path', type=str, default=None) + parser.add_argument('--eval_folder', type=str, required=True) + parser.add_argument('--eval_model_path', type=str, default='') + parser.add_argument('--eval_tool_version', type=str, default='2018', choices=['2018', '2021']) + parser.add_argument('--eval_caption_file', type=str, default='data/anet/captiondata/val_1.json') + parser.add_argument('--eval_proposal_type', type=str, default='gt') + parser.add_argument('--eval_transformer_input_type', type=str, default='queries', choices=['gt_proposals', 'prior_proposals','queries']) + parser.add_argument('--gpu_id', type=str, nargs='+', default=['0']) + parser.add_argument('--eval_device', type=str, default='cuda') + parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all']) + opt = parser.parse_args() + + #breakpoint() + + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + if True: + torch.backends.cudnn.enabled = False + main(opt) diff --git a/anet_clip/backup/eval_utils.py b/anet_clip/backup/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f4cd727ecebd0364fe9ad45d94f582fdcb17d54b --- /dev/null +++ b/anet_clip/backup/eval_utils.py @@ -0,0 +1,241 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import collections +import torch +import numpy as np +import json +from collections import OrderedDict +from tqdm import tqdm +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) + + +from densevid_eval3.eval_soda import eval_soda +from densevid_eval3.eval_para import eval_para +from densevid_eval3.eval_dvc import eval_dvc + +def calculate_avg_proposal_num(json_path): + data = json.load(open(json_path)) + return np.array([len(v) for v in data['results'].values()]).mean() + +def convert_tapjson_to_dvcjson(tap_json, dvc_json): + data = json.load(open(tap_json, 'r')) + data['version'] = "VERSION 1.0" + data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"} + + all_names = list(data['results'].keys()) + for video_name in all_names: + for p_info in data['results'][video_name]: + p_info['timestamp'] = p_info.pop('segment') + p_info['proposal_score'] = p_info.pop('score') + p_info['sentence_score'] = p_info.pop('sentence_score', 0) + data['results']["v_" + video_name] = data['results'].pop(video_name) + json.dump(data, open(dvc_json, 'w')) + + +def convert_dvcjson_to_tapjson(dvc_json, tap_json): + data = json.load(open(dvc_json, 'r'))['results'] + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + + all_names = list(data.keys()) + for video_name in all_names: + video_info = [] + event_num = len(data[video_name]) + timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)] + sentences = [data[video_name][i]['sentence'] for i in range(event_num)] + for i, timestamp in enumerate(timestamps): + score = data[video_name][i].get('proposal_score', 1.0) + video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)}) + out['results'][video_name[2:]] = video_info + json.dump(out, open(tap_json, 'w')) + + +def convert_gtjson_to_tapjson(gt_json, tap_json): + data = json.load(open(gt_json, 'r')) + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + + all_names = list(data.keys()) + for video_name in all_names: + video_info = [] + timestamps = data[video_name]['timestamps'] + sentences = data[video_name]['sentences'] + for i, timestamp in enumerate(timestamps): + video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]}) + out['results'][video_name[2:]] = video_info + with open(tap_json, 'w') as f: + json.dump(out, f) + + +def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8): + data = json.load(open(dvc_json, 'r'))['results'] + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + all_names = list(data.keys()) + num = 0 + bad_vid = 0 + for video_name in all_names: + info = data[video_name] + new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True) + new_info = [p for p in new_info if p[ranking_key] > score_thres] + new_info = new_info[:top_n] + out['results'][video_name] = new_info + num += len(new_info) + if len(new_info) == 0: + bad_vid += 1 + out['results'].pop(video_name) + print('average proosal number: {}'.format(num / len(all_names))) + print('bad videos number: {}'.format(bad_vid)) + print('good videos number: {}'.format(len(out['results']))) + with open(out_json, 'w') as f: + json.dump(out, f) + + +def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'): + score = collections.defaultdict(lambda: -1) + # top_n = 3 + # top_n_filename = dvc_filename + '.top{}.json'.format(top_n) + # get_topn_from_dvcjson(dvc_filename, top_n_filename, top_n=top_n, ranking_key=ranking_key) + # dvc_score = eval_dvc(json_path=top_n_filename, reference=gt_filenames) + # dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()} + # dvc_score.update(eval_soda(top_n_filename, ref_list=gt_filenames)) + # dvc_score.update(eval_para(top_n_filename, referneces=para_gt_filenames)) + # for key in dvc_score.keys(): + # score[key] = dvc_score[key] + if transformer_input_type == 'prior_proposals': + dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames) + score.update(dvc_score) + #breakpoint() + return score + + else: + if rerank: + dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0) + dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version) + dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()} + dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames)) + dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames)) + score.update(dvc_score) + return score + + +def save_dvc_json(out_json, path): + with open(path, 'w') as f: + out_json['valid_video_num'] = len(out_json['results']) + out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item() + json.dump(out_json, f) + +def reranking(p_src, alpha, temperature): + print('alpha: {}, temp: {}'.format(alpha, temperature)) + d = json.load(open(p_src)) + d_items = list(d['results'].items()) + for k,v in d_items: + if True: + sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v] + prop_score = [p['proposal_score'] for p in v] + joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score)) + for i,p in enumerate(v): + p['joint_score'] = joint_score[i] + v = sorted(v, key=lambda x: x['joint_score'], reverse=True) + topN = v[0]['pred_event_count'] + v = v[:topN] + v = sorted(v, key=lambda x: x['timestamp']) + d['results'][k] = v + save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature) + save_dvc_json(d, save_path) + return save_path + + +def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0, + alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False): + out_json = {'results': {}, + 'version': "VERSION 1.0", + 'external_data': {'used:': True, 'details': None}} + opt = loader.dataset.opt + + loss_sum = OrderedDict() + with torch.set_grad_enabled(False): + for dt in tqdm(loader, disable=opt.disable_tqdm): + # valid_keys = ["video_tensor", "video_length", "video_mask", "video_key"] + # dt = {key: value for key, value in dt.items() if key in valid_keys} + dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt = collections.defaultdict(lambda: None, dt) + + dt['video_target'] = [ + {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # output, loss = model(dt, criterion, contrastive_criterion=None, eval_mode=True) + output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True) + orig_target_sizes = dt['video_length'][:, 1] + + weight_dict = criterion.weight_dict + # Huabin comment this line (anything about 'loss') to avoid reporting losses during evaluation + # final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + + # Huabin comment this line to avoid reporting losses during evaluation + # for loss_k, loss_v in loss.items(): + # loss_sum[loss_k] = loss_sum.get(loss_k, 0) + loss_v.item() + # loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + results = postprocessors['bbox'](output, orig_target_sizes, loader) + + batch_json = {} + for idx, video_name in enumerate(dt['video_key']): + segment = results[idx]['boxes'].cpu().numpy() + raw_boxes = results[idx]['raw_boxes'].cpu().numpy() + # pdb.set_trace() + #breakpoint() + batch_json[video_name] = [ + { + "timestamp": segment[pid].tolist(), + "raw_box": raw_boxes[pid].tolist(), + "proposal_score": results[idx]['scores'][pid].item(), + "sentence": results[idx]['captions'][pid], + "sentence_score": results[idx]['caption_scores'][pid], + 'query_id': results[idx]['query_id'][pid].item(), + 'vid_duration': results[idx]['vid_duration'].item(), + 'pred_event_count': results[idx]['pred_seq_len'].item(), + } + for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold] + out_json['results'].update(batch_json) + if debug and len(out_json['results']) > 5: + break + + save_dvc_json(out_json, dvc_json_path) + + if skip_lang_eval: + return None, None + + # Huabin comment this line to avoid reporting losses during evaluation + # for k in loss_sum.keys(): + # loss_sum[k] = np.round(loss_sum[k] / (len(loader) + 1e-5), 3).item() + # logger.info('loss: {}'.format(loss_sum)) + scores = eval_metrics(dvc_json_path, + gt_filenames=opt.gt_file_for_eval, + para_gt_filenames=opt.gt_file_for_para_eval, + alpha=alpha, + rerank=(opt.count_loss_coef > 0), + dvc_eval_version=dvc_eval_version, + transformer_input_type=opt.transformer_input_type + ) + + out_json.update(scores) + save_dvc_json(out_json, dvc_json_path) + # return scores, loss_sum + return scores, [] diff --git a/anet_clip/backup/misc/MIL_loss.py b/anet_clip/backup/misc/MIL_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a234e01695ca8871b045a0ba31b13e9e79883a --- /dev/null +++ b/anet_clip/backup/misc/MIL_loss.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmdet.models.losses import accuracy +from mmdet.models.losses.cross_entropy_loss import _expand_onehot_labels +from .utils import weight_reduce_loss + + +class MILLoss(nn.Module): + + def __init__(self, + # use_binary=True, + # reduction='mean', + binary_ins=False, + loss_weight=1.0, eps=1e-6, loss_type='gfocal_loss'): + """ + Args: + use_binary (bool, optional): Whether to the prediction is + used for binary cross entopy + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + """ + super(MILLoss, self).__init__() + # self.use_binary = use_binary + # self.reduction = reduction + self.loss_weight = loss_weight + # if self.use_sigmoid: + # self.loss_cls = CrossEntropyLoss(use_sigmoid=True, loss_weight=loss_weight) + self.eps = eps + self.loss_type = loss_type + self.binary_ins = binary_ins + + def gfocal_loss(self, p, q, w=1.0): + l1 = (p - q) ** 2 + l2 = q * (p + self.eps).log() + (1 - q) * (1 - p + self.eps).log() + return -(l1 * l2 * w).sum(dim=-1) + + def forward(self, bag_cls_prob, bag_ins_outs, labels, valid, weight=None): + """ + bag_cls_outs: (B, N, C), + bag_ins_outs: (B, N, C*2/C) + valid: (B, N, 1/C) + labels: (B, ) + Returns: + """ + if self.binary_ins: + assert bag_ins_outs.shape[-1] / bag_cls_prob.shape[-1] == 2 + else: + assert bag_ins_outs.shape[-1] == bag_cls_prob.shape[-1] + + B, N, C = bag_cls_prob.shape + prob_cls = bag_cls_prob.unsqueeze(dim=-1) # (B, N, C, 1) + prob_ins = bag_ins_outs.reshape(B, N, C, -1) # (B, N, C, 2/1) + prob_ins = prob_ins.softmax(dim=1) * valid.unsqueeze(dim=-1) + prob_ins = F.normalize(prob_ins, dim=1, p=1) + prob = (prob_cls * prob_ins).sum(dim=1) + acc = accuracy(prob[..., 0], labels) + + label_weights = (valid.sum(dim=1) > 0).float() + labels = _expand_onehot_labels(labels, None, C)[0].float() + num_sample = max(torch.sum(label_weights.sum(dim=-1) > 0).float().item(), 1.) + + if prob.shape[-1] == 1: + prob = prob.squeeze(dim=-1) + elif prob.shape[-1] == 2: # with binary ins + pos_prob, neg_prob = prob[..., 0], prob[..., 1] + prob = torch.cat([pos_prob, neg_prob]) + neg_labels = labels.new_zeros(labels.shape) + labels = torch.cat([labels, neg_labels]) + label_weights = torch.cat([label_weights, label_weights]) + + if self.loss_type == 'gfocal_loss': + loss = self.gfocal_loss(prob, labels, label_weights) + if weight is not None: + # modified by fei ##############################################################3 + weight=weight.squeeze(-1) + elif self.loss_type == 'binary_cross_entropy': + # if self.use_sigmoid: + # method 1: + # loss = self.loss_cls( + # prob, + # labels, + # label_weights, + # avg_factor=avg_factor, + # reduction_override=reduction_override) + # method 2 + prob = prob.clamp(0, 1) + # modified by fei ##############################################################3 + loss = F.binary_cross_entropy(prob, labels.float(), None, reduction="none") + else: + raise ValueError() + loss = weight_reduce_loss(loss, weight, avg_factor=num_sample) * self.loss_weight + return loss, acc, num_sample \ No newline at end of file diff --git a/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc b/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b3a8d5ea6440b3f900ca9cd1815a5cf81f0534c Binary files /dev/null and b/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc differ diff --git a/anet_clip/backup/misc/build_vocab.py b/anet_clip/backup/misc/build_vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..181c9ff27a7edc9d57e16cba107a87857062e24b --- /dev/null +++ b/anet_clip/backup/misc/build_vocab.py @@ -0,0 +1,66 @@ +# coding:utf-8 +import json + +# file_path_list = ["data/captiondata/train_modified.json", "data/captiondata/val_1.json", "data/captiondata/val_2.json"] +file_path_list = ["data/captiondata/yc2/yc2_train.json", "data/captiondata/yc2/yc2_val.json"] + +count_threshold = 2 # 4 for anet, 2 for youcook2 +# output_path = './data/vocabulary_activitynet.json' +output_path = './data/vocabulary_youcook2.json' + +mark = [',', ':', '!', '_', ';', '-', '.', '?', '/', '"', '\\n', '\\'] + +count_vocal = {} + +for file_path in file_path_list: + data = json.load(open(file_path)) + video_ids = data.keys() + print('video num of ' + file_path.split('/')[-1], len(video_ids)) + for video_id in video_ids: + sentences = data[video_id]["sentences"] + for sentence in sentences: + for m in mark: + if m in sentence: + sentence = sentence.replace(m, " ") + sentence = sentence.replace(" ", " ") + sentence = sentence.replace(" ", " ") + sentence = sentence.replace(" ", " ") + + sentence = sentence.lstrip() + sentence = sentence.rstrip() + sentence = sentence.lower() + sentence = sentence.split(" ") + length = len(sentence) + + # print(sentence) + for word in sentence: + # print(type(word)) + for m in word: + if m == ' ': + print('warning !') + word = word.replace(m, '') + if word == '': + print('warning !') + pass + count_vocal[word] = count_vocal.get(word, 0) + 1 + +print("total word:", sum(count_vocal.values())) +count_vocal[''] = 1e10 +count_vocal[''] = 1e10 +vocab = [word for word, n in count_vocal.items() if n >= count_threshold] +bad_word = [word for word, n in count_vocal.items() if n < count_threshold] +bad_count = sum(count_vocal[word] for word in bad_word) + +vocab.append('UNK') +print("number of vocab:", len(vocab)) +print("number of bad word:", len(bad_word)) +print("number of unks:", bad_count) + +itow = {i + 1: w for i, w in enumerate(vocab)} +wtoi = {w: i + 1 for i, w in enumerate(vocab)} +print(len(itow)) +print(len(wtoi)) + +json.dump({'ix_to_word': itow, + 'word_to_ix': wtoi}, open(output_path, 'w')) +print("saving vocabulary file to {}".format(output_path)) \ No newline at end of file diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6e18c06bca951f4d0ae6bc5e92a08175f68343c Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc differ diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4f6c9c6fb2356fb3b50ae9390f74b21203aa9a4 Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc differ diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8794fcb4c80bab0af2f4c0acf2e324518d3630a Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc differ diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef902352c76a36b2fe11f4a84a4b6186c48b2831 Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc differ diff --git a/anet_clip/backup/misc/detr_utils/box_ops.py b/anet_clip/backup/misc/detr_utils/box_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..7d7106ba6c48a3cc3827a4bd923b08c7c61213af --- /dev/null +++ b/anet_clip/backup/misc/detr_utils/box_ops.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Utilities for bounding box manipulation and GIoU. +""" +import torch +from torchvision.ops.boxes import box_area + +def box_cl_to_xy(x): + c, l = x.unbind(-1) + b = [c - 0.5 * l, c + 0.5 * l] + return torch.stack(b, dim=-1) + +def box_xy_to_cl(x): + x0, x1 = x.unbind(-1) + b = [(x0 + x1) / 2, (x1 - x0)] + return torch.stack(b, dim=-1) + +# modified from torchvision to also return the union +def box_iou(boxes1, boxes2): + area1 = boxes1[:, 1] - boxes1[:, 0] + area2 = boxes2[:, 1] - boxes2[:, 0] + lt = torch.max(boxes1[:, None, 0], boxes2[:, 0]) # [N,M,2] + rb = torch.min(boxes1[:, None, 1], boxes2[:, 1]) # [N,M,2] + inter = (rb - lt).clamp(min=0) # [N,M,2] + union = area1[:, None] + area2 - inter + iou = inter / (union + 1e-5) + return iou, union + + +def generalized_box_iou(boxes1, boxes2): + """ + Generalized IoU from https://giou.stanford.edu/ + + The boxes should be in [x0, y0, x1, y1] format + + Returns a [N, M] pairwise matrix, where N = len(boxes1) + and M = len(boxes2) + """ + # degenerate boxes gives inf / nan results + # so do an early check + assert (boxes1[:, 1:] >= boxes1[:, :1]).all() + assert (boxes2[:, 1:] >= boxes2[:, :1]).all() + iou, union = box_iou(boxes1, boxes2) + lt = torch.min(boxes1[:, None, 0], boxes2[:, 0]) + rb = torch.max(boxes1[:, None, 1], boxes2[:, 1]) + area = (rb - lt).clamp(min=0) # [N,M,2] + giou = iou - (area - union) / (area + 1e-5) + return giou \ No newline at end of file diff --git a/anet_clip/backup/misc/detr_utils/misc.py b/anet_clip/backup/misc/detr_utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..168603606353a959ca9cf6a39fbf2d7f9216e560 --- /dev/null +++ b/anet_clip/backup/misc/detr_utils/misc.py @@ -0,0 +1,989 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import os +import subprocess +import time +from collections import defaultdict, deque +import datetime +import pickle +from typing import Optional, List + +import torch +import torch.distributed as dist +from torch import Tensor + +# needed due to empty tensor bug in pytorch and torchvision 0.5 +import torchvision +# if float(torchvision.__version__[:3]) < 0.7: +# from torchvision.ops import _new_empty_tensor +# from torchvision.ops.misc import _output_size + + +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import os +import subprocess +import time +from collections import defaultdict, deque +import datetime +import pickle +from typing import Optional, List + +import torch +import torch.nn as nn +import torch.distributed as dist +from torch import Tensor + +# needed due to empty tensor bug in pytorch and torchvision 0.5 +import torchvision +if float(torchvision.__version__[:3]) < 0.5: + import math + # from torchvision.ops.misc import _NewEmptyTensorOp + def _check_size_scale_factor(dim, size, scale_factor): + # type: (int, Optional[List[int]], Optional[float]) -> None + if size is None and scale_factor is None: + raise ValueError("either size or scale_factor should be defined") + if size is not None and scale_factor is not None: + raise ValueError("only one of size or scale_factor should be defined") + if not (scale_factor is not None and len(scale_factor) != dim): + raise ValueError( + "scale_factor shape must match input shape. " + "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) + ) + def _output_size(dim, input, size, scale_factor): + # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int] + assert dim == 2 + _check_size_scale_factor(dim, size, scale_factor) + if size is not None: + return size + # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat + assert scale_factor is not None and isinstance(scale_factor, (int, float)) + scale_factors = [scale_factor, scale_factor] + # math.floor might return float in py2.7 + return [ + int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) + ] +elif float(torchvision.__version__[:3]) < 0.7: + from torchvision.ops import _new_empty_tensor + from torchvision.ops.misc import _output_size + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + + # serialized to a Tensor + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to("cuda") + + # obtain Tensor size of each rank + local_size = torch.tensor([tensor.numel()], device="cuda") + size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + # receiving Tensor from all ranks + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) + if local_size != max_size: + padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") + tensor = torch.cat((tensor, padding), dim=0) + dist.all_gather(tensor_list, tensor) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.cuda.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def get_sha(): + cwd = os.path.dirname(os.path.abspath(__file__)) + + def _run(command): + return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() + sha = 'N/A' + diff = "clean" + branch = 'N/A' + try: + sha = _run(['git', 'rev-parse', 'HEAD']) + subprocess.check_output(['git', 'diff'], cwd=cwd) + diff = _run(['git', 'diff-index', 'HEAD']) + diff = "has uncommited changes" if diff else "clean" + branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) + except Exception: + pass + message = f"sha: {sha}, status: {diff}, branch: {branch}" + return message + + +def collate_fn(batch): + batch = list(zip(*batch)) + batch[0] = nested_tensor_from_tensor_list(batch[0]) + return tuple(batch) + + +def _max_by_axis(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + +def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): + # TODO make this more general + if tensor_list[0].ndim == 3: + # TODO make it support different-sized images + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + dtype = tensor_list[0].dtype + device = tensor_list[0].device + tensor = torch.zeros(batch_shape, dtype=dtype, device=device) + mask = torch.ones((b, h, w), dtype=torch.bool, device=device) + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + m[: img.shape[1], :img.shape[2]] = False + else: + raise ValueError('not supported') + return NestedTensor(tensor, mask) + + +class NestedTensor(object): + def __init__(self, tensors, mask: Optional[Tensor], duration=None): + self.tensors = tensors + self.mask = mask + self.duration = duration + + def to(self, device, non_blocking=False): + # type: (Device) -> NestedTensor # noqa + cast_tensor = self.tensors.to(device, non_blocking=non_blocking) + mask = self.mask + if mask is not None: + assert mask is not None + cast_mask = mask.to(device, non_blocking=non_blocking) + else: + cast_mask = None + return NestedTensor(cast_tensor, cast_mask) + + def record_stream(self, *args, **kwargs): + self.tensors.record_stream(*args, **kwargs) + if self.mask is not None: + self.mask.record_stream(*args, **kwargs) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def get_local_size(): + if not is_dist_avail_and_initialized(): + return 1 + return int(os.environ['LOCAL_SIZE']) + + +def get_local_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return int(os.environ['LOCAL_RANK']) + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + args.dist_url = 'env://' + os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count()) + elif 'SLURM_PROCID' in os.environ: + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + addr = subprocess.getoutput( + 'scontrol show hostname {} | head -n1'.format(node_list)) + os.environ['MASTER_PORT'] = os.environ.get('MASTER_PORT', '29500') + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['RANK'] = str(proc_id) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['LOCAL_SIZE'] = str(num_gpus) + args.dist_url = 'env://' + args.world_size = ntasks + args.rank = proc_id + args.gpu = proc_id % num_gpus + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) + + +@torch.no_grad() +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + if target.numel() == 0: + return [torch.zeros([], device=output.device)] + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +# def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): +# # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor +# """ +# Equivalent to nn.functional.interpolate, but with support for empty batch sizes. +# This will eventually be supported natively by PyTorch, and this +# class can go away. +# """ +# if float(torchvision.__version__[:3]) < 0.7: +# if input.numel() > 0: +# return torch.nn.functional.interpolate( +# input, size, scale_factor, mode, align_corners +# ) +# +# output_shape = _output_size(2, input, size, scale_factor) +# output_shape = list(input.shape[:-2]) + list(output_shape) +# if float(torchvision.__version__[:3]) < 0.5: +# return _NewEmptyTensorOp.apply(input, output_shape) +# return _new_empty_tensor(input, output_shape) +# else: +# return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) + + +def get_total_grad_norm(parameters, norm_type=2): + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + device = parameters[0].grad.device + total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), + norm_type) + return total_norm + + +def inverse_sigmoid(x, eps=1e-5): + x = x.clamp(min=0, max=1) + x1 = x.clamp(min=eps) + x2 = (1 - x).clamp(min=eps) + return torch.log(x1/x2) + + + +# class SmoothedValue(object): +# """Track a series of values and provide access to smoothed values over a +# window or the global series average. +# """ +# +# def __init__(self, window_size=20, fmt=None): +# if fmt is None: +# fmt = "{median:.4f} ({global_avg:.4f})" +# self.deque = deque(maxlen=window_size) +# self.total = 0.0 +# self.count = 0 +# self.fmt = fmt +# +# def update(self, value, n=1): +# self.deque.append(value) +# self.count += n +# self.total += value * n +# +# def synchronize_between_processes(self): +# """ +# Warning: does not synchronize the deque! +# """ +# if not is_dist_avail_and_initialized(): +# return +# t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') +# dist.barrier() +# dist.all_reduce(t) +# t = t.tolist() +# self.count = int(t[0]) +# self.total = t[1] +# +# @property +# def median(self): +# d = torch.tensor(list(self.deque)) +# return d.median().item() +# +# @property +# def avg(self): +# d = torch.tensor(list(self.deque), dtype=torch.float32) +# return d.mean().item() +# +# @property +# def global_avg(self): +# return self.total / self.count +# +# @property +# def max(self): +# return max(self.deque) +# +# @property +# def value(self): +# return self.deque[-1] +# +# def __str__(self): +# return self.fmt.format( +# median=self.median, +# avg=self.avg, +# global_avg=self.global_avg, +# max=self.max, +# value=self.value) +# +# +# def all_gather(data): +# """ +# Run all_gather on arbitrary picklable data (not necessarily tensors) +# Args: +# data: any picklable object +# Returns: +# list[data]: list of data gathered from each rank +# """ +# world_size = get_world_size() +# if world_size == 1: +# return [data] +# +# # serialized to a Tensor +# buffer = pickle.dumps(data) +# storage = torch.ByteStorage.from_buffer(buffer) +# tensor = torch.ByteTensor(storage).to("cuda") +# +# # obtain Tensor size of each rank +# local_size = torch.tensor([tensor.numel()], device="cuda") +# size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] +# dist.all_gather(size_list, local_size) +# size_list = [int(size.item()) for size in size_list] +# max_size = max(size_list) +# +# # receiving Tensor from all ranks +# # we pad the tensor because torch all_gather does not support +# # gathering tensors of different shapes +# tensor_list = [] +# for _ in size_list: +# tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) +# if local_size != max_size: +# padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") +# tensor = torch.cat((tensor, padding), dim=0) +# dist.all_gather(tensor_list, tensor) +# +# data_list = [] +# for size, tensor in zip(size_list, tensor_list): +# buffer = tensor.cpu().numpy().tobytes()[:size] +# data_list.append(pickle.loads(buffer)) +# +# return data_list +# +# +# def reduce_dict(input_dict, average=True): +# """ +# Args: +# input_dict (dict): all the values will be reduced +# average (bool): whether to do average or sum +# Reduce the values in the dictionary from all processes so that all processes +# have the averaged results. Returns a dict with the same fields as +# input_dict, after reduction. +# """ +# world_size = get_world_size() +# if world_size < 2: +# return input_dict +# with torch.no_grad(): +# names = [] +# values = [] +# # sort the keys so that they are consistent across processes +# for k in sorted(input_dict.keys()): +# names.append(k) +# values.append(input_dict[k]) +# values = torch.stack(values, dim=0) +# dist.all_reduce(values) +# if average: +# values /= world_size +# reduced_dict = {k: v for k, v in zip(names, values)} +# return reduced_dict +# +# +# class MetricLogger(object): +# def __init__(self, delimiter="\t"): +# self.meters = defaultdict(SmoothedValue) +# self.delimiter = delimiter +# +# def update(self, **kwargs): +# for k, v in kwargs.items(): +# if isinstance(v, torch.Tensor): +# v = v.item() +# assert isinstance(v, (float, int)) +# self.meters[k].update(v) +# +# def __getattr__(self, attr): +# if attr in self.meters: +# return self.meters[attr] +# if attr in self.__dict__: +# return self.__dict__[attr] +# raise AttributeError("'{}' object has no attribute '{}'".format( +# type(self).__name__, attr)) +# +# def __str__(self): +# loss_str = [] +# for name, meter in self.meters.items(): +# loss_str.append( +# "{}: {}".format(name, str(meter)) +# ) +# return self.delimiter.join(loss_str) +# +# def synchronize_between_processes(self): +# for meter in self.meters.values(): +# meter.synchronize_between_processes() +# +# def add_meter(self, name, meter): +# self.meters[name] = meter +# +# def log_every(self, iterable, print_freq, header=None): +# i = 0 +# if not header: +# header = '' +# start_time = time.time() +# end = time.time() +# iter_time = SmoothedValue(fmt='{avg:.4f}') +# data_time = SmoothedValue(fmt='{avg:.4f}') +# space_fmt = ':' + str(len(str(len(iterable)))) + 'd' +# if torch.cuda.is_available(): +# log_msg = self.delimiter.join([ +# header, +# '[{0' + space_fmt + '}/{1}]', +# 'eta: {eta}', +# '{meters}', +# 'time: {time}', +# 'data: {data}', +# 'max mem: {memory:.0f}' +# ]) +# else: +# log_msg = self.delimiter.join([ +# header, +# '[{0' + space_fmt + '}/{1}]', +# 'eta: {eta}', +# '{meters}', +# 'time: {time}', +# 'data: {data}' +# ]) +# MB = 1024.0 * 1024.0 +# for obj in iterable: +# data_time.update(time.time() - end) +# yield obj +# iter_time.update(time.time() - end) +# if i % print_freq == 0 or i == len(iterable) - 1: +# eta_seconds = iter_time.global_avg * (len(iterable) - i) +# eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) +# if torch.cuda.is_available(): +# print(log_msg.format( +# i, len(iterable), eta=eta_string, +# meters=str(self), +# time=str(iter_time), data=str(data_time), +# memory=torch.cuda.max_memory_allocated() / MB)) +# else: +# print(log_msg.format( +# i, len(iterable), eta=eta_string, +# meters=str(self), +# time=str(iter_time), data=str(data_time))) +# i += 1 +# end = time.time() +# total_time = time.time() - start_time +# total_time_str = str(datetime.timedelta(seconds=int(total_time))) +# print('{} Total time: {} ({:.4f} s / it)'.format( +# header, total_time_str, total_time / len(iterable))) +# +# +# def get_sha(): +# cwd = os.path.dirname(os.path.abspath(__file__)) +# +# def _run(command): +# return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() +# sha = 'N/A' +# diff = "clean" +# branch = 'N/A' +# try: +# sha = _run(['git', 'rev-parse', 'HEAD']) +# subprocess.check_output(['git', 'diff'], cwd=cwd) +# diff = _run(['git', 'diff-index', 'HEAD']) +# diff = "has uncommited changes" if diff else "clean" +# branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) +# except Exception: +# pass +# message = f"sha: {sha}, status: {diff}, branch: {branch}" +# return message +# +# # +# # def collate_fn(batch): +# # batch = list(zip(*batch)) +# # batch[0] = nested_tensor_from_tensor_list(batch[0]) +# # return tuple(batch) +# +# +# def _max_by_axis(the_list): +# # type: (List[List[int]]) -> List[int] +# maxes = the_list[0] +# for sublist in the_list[1:]: +# for index, item in enumerate(sublist): +# maxes[index] = max(maxes[index], item) +# return maxes +# +# +# class NestedTensor(object): +# def __init__(self, tensors, mask: Optional[Tensor]): +# self.tensors = tensors +# self.mask = mask +# +# def to(self, device): +# # type: (Device) -> NestedTensor # noqa +# cast_tensor = self.tensors.to(device) +# mask = self.mask +# if mask is not None: +# assert mask is not None +# cast_mask = mask.to(device) +# else: +# cast_mask = None +# return NestedTensor(cast_tensor, cast_mask) +# +# def decompose(self): +# return self.tensors, self.mask +# +# def __repr__(self): +# return str(self.tensors) +# +# # +# # def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): +# # # TODO make this more general +# # if tensor_list[0].ndim == 3: +# # if torchvision._is_tracing(): +# # # nested_tensor_from_tensor_list() does not export well to ONNX +# # # call _onnx_nested_tensor_from_tensor_list() instead +# # return _onnx_nested_tensor_from_tensor_list(tensor_list) +# # +# # # TODO make it support different-sized images +# # max_size = _max_by_axis([list(img.shape) for img in tensor_list]) +# # # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) +# # batch_shape = [len(tensor_list)] + max_size +# # b, c, h, w = batch_shape +# # dtype = tensor_list[0].dtype +# # device = tensor_list[0].device +# # tensor = torch.zeros(batch_shape, dtype=dtype, device=device) +# # mask = torch.ones((b, h, w), dtype=torch.bool, device=device) +# # for img, pad_img, m in zip(tensor_list, tensor, mask): +# # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) +# # m[: img.shape[1], :img.shape[2]] = False +# # else: +# # raise ValueError('not supported') +# # return NestedTensor(tensor, mask) +# +# +# # _onnx_nested_tensor_from_tensor_list() is an implementation of +# # nested_tensor_from_tensor_list() that is supported by ONNX tracing. +# # @torch.jit.unused +# # def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: +# # max_size = [] +# # for i in range(tensor_list[0].dim()): +# # max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64) +# # max_size.append(max_size_i) +# # max_size = tuple(max_size) +# # +# # # work around for +# # # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) +# # # m[: img.shape[1], :img.shape[2]] = False +# # # which is not yet supported in onnx +# # padded_imgs = [] +# # padded_masks = [] +# # for img in tensor_list: +# # padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] +# # padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) +# # padded_imgs.append(padded_img) +# # +# # m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) +# # padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) +# # padded_masks.append(padded_mask.to(torch.bool)) +# # +# # tensor = torch.stack(padded_imgs) +# # mask = torch.stack(padded_masks) +# # +# # return NestedTensor(tensor, mask=mask) +# +# +# def setup_for_distributed(is_master): +# """ +# This function disables printing when not in master process +# """ +# import builtins as __builtin__ +# builtin_print = __builtin__.print +# +# def print(*args, **kwargs): +# force = kwargs.pop('force', False) +# if is_master or force: +# builtin_print(*args, **kwargs) +# +# __builtin__.print = print +# +# +# def is_dist_avail_and_initialized(): +# if not dist.is_available(): +# return False +# if not dist.is_initialized(): +# return False +# return True +# +# +# def get_world_size(): +# if not is_dist_avail_and_initialized(): +# return 1 +# return dist.get_world_size() +# +# +# def get_rank(): +# if not is_dist_avail_and_initialized(): +# return 0 +# return dist.get_rank() +# +# +# def is_main_process(): +# return get_rank() == 0 +# +# +# def save_on_master(*args, **kwargs): +# if is_main_process(): +# torch.save(*args, **kwargs) +# +# +# def init_distributed_mode(args): +# if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: +# args.rank = int(os.environ["RANK"]) +# args.world_size = int(os.environ['WORLD_SIZE']) +# args.gpu = int(os.environ['LOCAL_RANK']) +# elif 'SLURM_PROCID' in os.environ: +# args.rank = int(os.environ['SLURM_PROCID']) +# args.gpu = args.rank % torch.cuda.device_count() +# else: +# print('Not using distributed mode') +# args.distributed = False +# return +# +# args.distributed = True +# +# torch.cuda.set_device(args.gpu) +# args.dist_backend = 'nccl' +# print('| distributed init (rank {}): {}'.format( +# args.rank, args.dist_url), flush=True) +# torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, +# world_size=args.world_size, rank=args.rank) +# torch.distributed.barrier() +# setup_for_distributed(args.rank == 0) +# +# +# @torch.no_grad() +# def accuracy(output, target, topk=(1,)): +# """Computes the precision@k for the specified values of k""" +# if target.numel() == 0: +# return [torch.zeros([], device=output.device)] +# maxk = max(topk) +# batch_size = target.size(0) +# +# _, pred = output.topk(maxk, 1, True, True) +# pred = pred.t() +# correct = pred.eq(target.view(1, -1).expand_as(pred)) +# +# res = [] +# for k in topk: +# correct_k = correct[:k].view(-1).float().sum(0) +# res.append(correct_k.mul_(100.0 / batch_size)) +# return res +# +# +# # def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): +# # # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor +# # """ +# # Equivalent to nn.functional.interpolate, but with support for empty batch sizes. +# # This will eventually be supported natively by PyTorch, and this +# # class can go away. +# # """ +# # if float(torchvision.__version__[:3]) < 0.7: +# # if input.numel() > 0: +# # return torch.nn.functional.interpolate( +# # input, size, scale_factor, mode, align_corners +# # ) +# # +# # output_shape = _output_size(2, input, size, scale_factor) +# # output_shape = list(input.shape[:-2]) + list(output_shape) +# # return _new_empty_tensor(input, output_shape) +# # else: +# # return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) diff --git a/anet_clip/backup/misc/utils.py b/anet_clip/backup/misc/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b3e979477f4d1a97c28daed7f5592ea6a0a59716 --- /dev/null +++ b/anet_clip/backup/misc/utils.py @@ -0,0 +1,352 @@ +# coding:utf-8 +# from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +import torch +import numpy as np +import glob +import shutil +import os +import colorlog +import random +import six +from six.moves import cPickle +import matplotlib as mpl + +mpl.use('Agg') +import matplotlib.pyplot as plt + + +def match_name_keywords(n, name_keywords): + out = False + for b in name_keywords: + if b in n: + out = True + break + return out + + +def decide_two_stage(transformer_input_type, dt, criterion): + if transformer_input_type == 'gt_proposals': + two_stage = True + proposals = dt['gt_boxes'] + proposals_mask = dt['gt_boxes_mask'] + criterion.matcher.cost_caption = 0 + for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']: + for key in criterion.weight_dict.keys(): + if q_k in key: + criterion.weight_dict[key] = 0 + disable_iterative_refine = True + elif transformer_input_type == 'prior_proposals': + two_stage = True + proposals = dt['gt_boxes'] + proposals_mask = None + criterion.matcher.cost_caption = 0 + for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']: + for key in criterion.weight_dict.keys(): + if q_k in key: + criterion.weight_dict[key] = 0 + disable_iterative_refine = False + elif transformer_input_type == 'queries': # + two_stage = False + proposals = None + proposals_mask = None + disable_iterative_refine = False + else: + raise ValueError('Wrong value of transformer_input_type, got {}'.format(transformer_input_type)) + return two_stage, disable_iterative_refine, proposals, proposals_mask + + +def pickle_load(f): + """ Load a pickle. + Parameters + ---------- + f: file-like object + """ + if six.PY3: + return cPickle.load(f, encoding='latin-1') + else: + return cPickle.load(f) + + +def pickle_dump(obj, f): + """ Dump a pickle. + Parameters + ---------- + obj: pickled object + f: file-like object + """ + if six.PY3: + return cPickle.dump(obj, f, protocol=2) + else: + return cPickle.dump(obj, f) + + +def set_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + # grid_sampler_2d_backward_cuda does not have a deterministic implementation. try set torch.use_deterministic_algorithms(True, warn_only=True) to see the non-deterministic operation + # torch.use_deterministic_algorithms(True, warn_only=True) + + +def update_values(dict_from, dict_to): + for key, value in dict_from.items(): + if key not in dict_to.keys(): + raise AssertionError('key mismatching: {}'.format(key)) + if isinstance(value, dict): + update_values(dict_from[key], dict_to[key]) + elif value is not None: + dict_to[key] = dict_from[key] + + +def print_opt(opt, model, logger): + print_alert_message('All args:', logger) + for key, item in opt._get_kwargs(): + logger.info('{} = {}'.format(key, item)) + print_alert_message('Model structure:', logger) + logger.info(model) + + +def build_folder_name(opt): + # The dataset + # breakpoint() + if len(opt.visual_feature_folder) == 2: + if ('youcook2' in opt.visual_feature_folder[1]) or ('yc2' in opt.visual_feature_folder[1]): + dataset_name = 'howto-yc2_yc2' + elif ('Tasty' in opt.visual_feature_folder[1]) or ('tasty' in opt.visual_feature_folder[1]): + dataset_name = 'howto-tasty_tasty' + elif ('anet' in opt.visual_feature_folder[1]) or ('Anet' in opt.visual_feature_folder[1]): + dataset_name = 'howto-anet_anet' + # elif ('vlep' in opt.visual_feature_folder[1]) or ('Vlep' in opt.visual_feature_folder[1]): + # dataset_name = 'howto-vlep_vlep' + else: + raise ValueError('Wrong dataset name') + + if 'vlep' in opt.visual_feature_folder[0] or 'Vlep' in opt.visual_feature_folder[0]: + dataset_name = dataset_name.replace('howto', 'vlep') + else: + if ('youcook2' in opt.visual_feature_folder[0]) or ('yc2' in opt.visual_feature_folder[0]): + dataset_name = 'yc2' + elif ('Anet' in opt.visual_feature_folder[0]) or ('anet' in opt.visual_feature_folder[0]): + dataset_name = 'anet' + elif ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]): + dataset_name = 'tasty' + elif ('Howto' in opt.visual_feature_folder[0]) or ('howto' in opt.visual_feature_folder[0]): + if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]): + dataset_name = 'howto_yc2' + elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]: + dataset_name = 'howto_tasty' + elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]: + dataset_name = 'howto_anet' + elif ('vlep' in opt.visual_feature_folder[0]) or ('Vlep' in opt.visual_feature_folder[0]): + if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]): + dataset_name = 'vlep_yc2' + elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]: + dataset_name = 'vlep_tasty' + elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]: + dataset_name = 'vlep_anet' + else: + raise ValueError('Wrong dataset name') + if 'tasty_14' in opt.dict_file: + dataset_name += '_voc14' + + # The code base + if opt.use_anchor: + use_anchor = 'anc' # Means learnable anchor is used + else: + use_anchor = 'ori' # Means original anchor in pdvc is used + + # The state of using pseudo boxes + if opt.use_pseudo_box: + use_pseudo = 'pbox' + if opt.pseudo_box_type == 'similarity': + use_pseudo += '(sim)' + else: + use_pseudo += '({})'.format(opt.pseudo_box_type) + else: + use_pseudo = 'GT' + + # The viusal-text model used + if opt.pretrained_language_model == 'CLIP-ViP': + text_model = 'ViP' + elif opt.pretrained_language_model == 'UniVL': + text_model = 'Uni' + else: + text_model = opt.pretrained_language_model + + format_folder_name = '_'.join([dataset_name, use_anchor, use_pseudo, text_model]) + + + + return format_folder_name + +def build_folder(opt): + # breakpoint() + if opt.start_from: + print('Start training from id:{}'.format(opt.start_from)) + save_folder = os.path.join(opt.save_dir, opt.start_from) + assert os.path.exists(save_folder) and os.path.isdir(save_folder), 'Wrong start_from path: {}'.format(save_folder) + else: + if not os.path.exists(opt.save_dir): + os.mkdir(opt.save_dir) + format_folder_name = build_folder_name(opt) + # breakpoint() + save_foldername = '' + if opt.use_pseudo_box: + if opt.pseudo_box_type != 'align': + if opt.pseudo_box_type == 'similarity_op' or opt.pseudo_box_type == 'similarity_op_order': + save_foldername = '{}_topf{}_beta{}_iter{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.beta, opt.iteration, opt.width_ratio) + elif opt.pseudo_box_type == 'similarity_op_order_v2': + save_foldername = '{}_topf{}_iter{}_r{}_th{}'.format(opt.pseudo_box_type, opt.top_frames, opt.iteration, opt.width_ratio, opt.width_th) + else: + save_foldername = '{}_topf{}_w{}_{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.window_size, opt.statistic_mode, opt.width_ratio) + else: + save_folder = 'align' + else: + save_foldername = 'gtbox' + + if opt.refine_pseudo_box: + save_foldername += '_refine_aug({},{})_top{}_{}stage'.format(opt.pseudo_box_aug_num, \ + opt.pseudo_box_aug_ratio, \ + opt.merge_k_boxes, \ + opt.refine_pseudo_stage_num) + if opt.pseudo_box_aug_mode == 'uniform': + save_foldername += '_uniform' + elif opt.pseudo_box_aug_mode == 'random_new': + save_foldername += '_random_new' + save_foldername += ('_' + opt.merge_criterion) + if opt.merge_mode == 'interpolate': + save_foldername += '_interpolate' + if opt.use_neg_pseudo_box: + save_foldername += '_{}neg'.format(opt.num_neg_box) + if opt.mil_loss_coef != 1.0: + save_foldername += '_mil_coef{}'.format(str(opt.mil_loss_coef)) + if opt.weighted_mil_loss: + save_foldername += '_wMIL' + if not opt.focal_mil: + save_foldername += '_noFocal' + if opt.disable_rematch: + save_foldername += '_nomatch' + if opt.use_additional_score_layer: + save_foldername += '_S-layer' + if opt.use_additional_cap_layer: + save_foldername += '_C-layer' + if 'puyu' in opt.train_caption_file[0]: + save_foldername += '_puyu' + elif 'mix' in opt.train_caption_file[0]: + save_foldername += '_mixlm' + + if opt.id != '': + save_foldername += '_{}'.format(opt.id) + # breakpoint() + # basefilename = os.path.basename(opt.cfg_path) + # basefilename = os.path.splitext(basefilename)[0] + save_folder = os.path.join(opt.save_dir, format_folder_name) + save_folder = os.path.join(save_folder, save_foldername) + if os.path.exists(save_folder): + print('Results folder "{}" already exists, renaming it...'.format(save_folder)) + i = 1 + while 1: + new_save_folder = save_folder + '_{}'.format(i) + if not os.path.exists(new_save_folder): + save_folder = new_save_folder + break + i += 1 + # wait_flag = input('Warning! Path {} already exists, rename it? (Y/N) : '.format(save_folder)) + # if wait_flag in ['Y', 'y']: + # # opt.id = opt.id + '_{}'.format(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) + # # save_folder = os.path.join(opt.save_dir, opt.id) + # # print('Rename opt.id as "{}".'.format(opt.id)) + # new_name = input('the new name to be appended :') + # save_folder = save_folder + '_' + new_name + # # elif wait_flag in ['N', 'n']: + # # wait_flag_new = input('Are you sure re-write this folder:{}? (Y/N): '.format(save_folder)) + # # if wait_flag_new in ['Y', 'y']: + # # return save_folder + # # else: + # # raise AssertionError('Folder {} already exists'.format(save_folder)) + # else: + # raise AssertionError('Folder {} already exists'.format(save_folder)) + print('Results folder "{}" does not exist, creating folder...'.format(save_folder)) + os.makedirs(save_folder) + os.makedirs(os.path.join(save_folder, 'prediction')) + return save_folder + + +def backup_envir(save_folder): + backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc'] + backup_files = glob.glob('./*.py') + for folder in backup_folders: + shutil.copytree(folder, os.path.join(save_folder, 'backup', folder)) + for file in backup_files: + shutil.copyfile(file, os.path.join(save_folder, 'backup', file)) + + +def create_logger(folder, filename): + log_colors = { + 'DEBUG': 'blue', + 'INFO': 'white', + 'WARNING': 'green', + 'ERROR': 'red', + 'CRITICAL': 'yellow', + } + + import logging + logger = logging.getLogger('DVC') + # %(filename)s$RESET:%(lineno)d + # LOGFORMAT = "%(log_color)s%(asctime)s [%(log_color)s%(filename)s:%(lineno)d] | %(log_color)s%(message)s%(reset)s |" + LOGFORMAT = "" + LOG_LEVEL = logging.DEBUG + logging.root.setLevel(LOG_LEVEL) + stream = logging.StreamHandler() + stream.setLevel(LOG_LEVEL) + stream.setFormatter(colorlog.ColoredFormatter(LOGFORMAT, datefmt='%d %H:%M', log_colors=log_colors)) + + # print to log file + hdlr = logging.FileHandler(os.path.join(folder, filename)) + hdlr.setLevel(LOG_LEVEL) + # hdlr.setFormatter(logging.Formatter("[%(asctime)s] %(message)s")) + hdlr.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(hdlr) + logger.addHandler(stream) + return logger + + +def print_alert_message(str, logger=None): + msg = '*' * 20 + ' ' + str + ' ' + '*' * (58 - len(str)) + if logger: + logger.info('\n\n' + msg) + else: + print(msg) + + +def set_lr(optimizer, lr): + for group in optimizer.param_groups: + group['lr'] = lr + + +def clip_gradient(optimizer, grad_clip): + for group in optimizer.param_groups: + for i, param in enumerate(group['params']): + if param.grad is not None: + param.grad.data.clamp_(-grad_clip, grad_clip) + + +if __name__ == '__main__': + # import opts + # + # info = {'opt': vars(opts.parse_opts()), + # 'loss': {'tap_loss': 0, 'tap_reg_loss': 0, 'tap_conf_loss': 0, 'lm_loss': 0}} + # record_this_run_to_csv(info, 'save/results_all_runs.csv') + + logger = create_logger('./', 'mylogger.log') + logger.info('debug') + logger.info('test2') diff --git a/anet_clip/backup/opts.py b/anet_clip/backup/opts.py new file mode 100644 index 0000000000000000000000000000000000000000..e2edf8fa4918e9b960cd26d0fa561d3b1155b4ff --- /dev/null +++ b/anet_clip/backup/opts.py @@ -0,0 +1,311 @@ +import argparse +import time +import yaml +import os +import numpy as np + +def parse_opts(): + parser = argparse.ArgumentParser() + + # configure of this run + parser.add_argument('--cfg_path', type=str, required=True, help='config file') + parser.add_argument('--id', type=str, default='', help='id of this run. Results and logs will saved in this folder ./save/id') + parser.add_argument('--gpu_id', type=str, nargs='+', default=[]) + parser.add_argument('--disable_tqdm', action='store_true') + parser.add_argument('--seed', type=int, default=777) + parser.add_argument('--random_seed', action='store_true', help='choose a random seed from {1,...,1000}') + parser.add_argument('--disable_cudnn', type=int, default=0, help='disable cudnn may solve some unknown bugs') + parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging') + parser.add_argument('--device', default='cuda', choices=['cpu', 'cuda'], help='device to use for training / testing') + parser.add_argument('--map', action='store_true', default=False, help='map a100 data path to 3090 data path') + # parser.add_argument('--extra_id', type=str, default='', help='extra config for listed in the folder name') + + # ***************************** INPUT DATA PATH ***************************** + parser.add_argument('--train_caption_file', type=str, + default='data/anet/captiondata/train_modified.json', help='') + parser.add_argument('--invalid_video_json', type=str, nargs='+', default=[]) + parser.add_argument('--val_caption_file', type=str, default='data/anet/captiondata/val_1.json') + parser.add_argument('--visual_feature_folder', type=str, default='data/anet/resnet_bn') + parser.add_argument('--text_feature_folder', type=str, default=None) + parser.add_argument('--gt_file_for_auc', type=str, nargs='+', default='data/anet/captiondata/val_all.json') + parser.add_argument('--gt_file_for_eval', type=str, nargs='+', default=['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']) + parser.add_argument('--gt_file_for_para_eval', type=str, nargs='+', default= ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']) + parser.add_argument('--dict_file', type=str, default='data/anet/vocabulary_activitynet.json', help='') + parser.add_argument('--criteria_for_best_ckpt', type=str, default='overall', choices=['dvc', 'pc', 'overall'], help='for dense video captioning, use soda_c + METEOR as the criteria' + 'for paragraph captioning, choose the best para_METEOR+para_CIDEr+para_BLEU4' + 'for overall, select BLEU4 + METEOR + F1_score') + + parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet', 'UniVL', 'CLIP', 'CLIP-ViP']) + parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector') + + parser.add_argument('--start_from', type=str, default='', help='id of the run with incompleted training') + parser.add_argument('--start_from_mode', type=str, choices=['best', 'last'], default="last") + parser.add_argument('--pretrain', type=str, choices=['full', 'encoder', 'decoder']) + parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth') + + # ***************************** DATALOADER OPTION ***************************** + parser.add_argument('--nthreads', type=int, default=4) + parser.add_argument('--data_norm', type=int, default=0) + parser.add_argument('--data_rescale', type=int, default=1) + + parser.add_argument('--feature_sample_rate', type=int, default=1) + parser.add_argument('--train_proposal_sample_num', type=int, + default=24, + help='number of sampled proposals (or proposal sequence), a bigger value may be better') + parser.add_argument('--gt_proposal_sample_num', type=int, default=30) + parser.add_argument('--ft_gt_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.') + parser.add_argument('--pre_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.') + + + # ***************************** Caption Decoder ***************************** + parser.add_argument('--vocab_size', type=int, default=5747) + parser.add_argument('--wordRNN_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'], + help='C:clip-level features, E: event-level features, C+E: both') + parser.add_argument('--caption_decoder_type', type=str, default="light", + choices=['none','light', 'standard']) + parser.add_argument('--rnn_size', type=int, default=512, + help='size of the rnn in number of hidden nodes in each layer') + parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the RNN') + parser.add_argument('--input_encoding_size', type=int, default=512, + help='the encoding size of each token in the vocabulary') + parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP') + parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN') + parser.add_argument('--max_caption_len', type=int, default=30, help='') + + # ***************************** Transformer ***************************** + parser.add_argument('--hidden_dim', type=int, default=512) + parser.add_argument('--num_queries', type=int, default=100) + parser.add_argument('--hidden_dropout_prob', type=float, default=0.5) + parser.add_argument('--layer_norm_eps', type=float, default=1e-12) + parser.add_argument('--caption_cost_type', type=str, default='loss') + parser.add_argument('--set_cost_caption', type=float, default=0) + parser.add_argument('--set_cost_class', type=float, default=1) + parser.add_argument('--set_cost_bbox', type=float, default=5) + parser.add_argument('--set_cost_giou', type=float, default=2) + parser.add_argument('--cost_alpha', type=float, default=0.25) + parser.add_argument('--cost_gamma', type=float, default=2) + + parser.add_argument('--bbox_loss_coef', default=5, type=float) + parser.add_argument('--giou_loss_coef', default=2, type=float) + parser.add_argument('--count_loss_coef', default=0, type=float) + parser.add_argument('--caption_loss_coef', default=0, type=float) + parser.add_argument('--eos_coef', default=0.1, type=float, + help="Relative classification weight of the no-object class") + parser.add_argument('--num_classes', type=int, default=1) + parser.add_argument('--dec_layers', type=int, default=6) + parser.add_argument('--enc_layers', type=int, default=6) + parser.add_argument('--transformer_ff_dim', type=int, default=2048) + parser.add_argument('--transformer_dropout_prob', type=float, default=0.1) + parser.add_argument('--frame_embedding_num', type=int, default = 100) + parser.add_argument('--sample_method', type=str, default = 'nearest', choices=['nearest', 'linear']) + parser.add_argument('--fix_xcw', type=int, default=0) + + # ***************************** Learnable anchor ***************************** + parser.add_argument('--use_anchor', default=False, action='store_true') + parser.add_argument('--random_anchor_init', default=True, action='store_false') + parser.add_argument('--prior_anchor_duration_init', default=True, action='store_false') + + # ***************************** Text-query alignment ***************************** + parser.add_argument('--matcher_type', type=str, default='default', choices=['default', 'DTW', 'Sim']) + # === For Text encoder === + parser.add_argument('--pretrained_language_model', type=str, default='UniVL', \ + choices=['UniVL', 'CLIP', 'CLIP-ViP'], help='Pretrained hugging face model') + parser.add_argument('--text_hidden_dim', type=int, default=768, help='hidden dim of text encoder') + parser.add_argument('--max_text_input_len', type=int, default=32, help='') + parser.add_argument('--max_pos_num', type=int, default=500) + parser.add_argument('--huggingface_cache_dir', type=str, default='.cache') + parser.add_argument('--text_encoder_learning_strategy', type=str, default='frozen',choices=('frozen')) + + # === For generate_pesudo_bbox === + parser.add_argument('--use_pseudo_box', default=False, action='store_true') + parser.add_argument('--pseudo_box_type', type=str, default='similarity', choices=['align', 'similarity', 'weight_sim', 'weight_index', 'modeframe']) + + # 1) For different ways of generating pseudo box + parser.add_argument('--top_frames', type=int, default=15) + parser.add_argument('--window_size', type=int, default=2) + parser.add_argument('--statistic_mode', type=str, default='median', choices=['mode', 'median']) + parser.add_argument('--width_ratio', type=float, default=-1) + parser.add_argument('--beta', type=float, default=1, help="weight for overlap loss") + parser.add_argument('--width_th', type=float, default=0.5, help="threshold for width") + parser.add_argument('--iteration', type=int, default=3, help="iteration for pseudo box generation") + # 2) For box refinement + parser.add_argument('--pseudo_box_aug', default=False, action='store_true') + parser.add_argument('--pseudo_box_aug_num', type=int, default=5) + parser.add_argument('--pseudo_box_aug_ratio', type=float, default=0.1) + parser.add_argument('--pseudo_box_aug_mode', default='random', choices=['random', 'uniform']) + parser.add_argument('--refine_pseudo_box', default=False, action='store_true') + parser.add_argument('--use_additional_score_layer', default=False, action='store_true') + parser.add_argument('--use_additional_cap_layer', default=False, action='store_true') + parser.add_argument('--merge_k_boxes', type=int, default=3) + parser.add_argument('--merge_criterion', type=str, choices=['cap_topk', 'ins_topk', 'ins_cap_topk'], default='cap_topk') + parser.add_argument('--merge_mode', type=str, choices=['weighted_sum, interpolate'], default='weighted_sum') + parser.add_argument('--refine_pseudo_stage_num', type=int, default=2) + parser.add_argument('--use_query_box_for_refine', default=False, action='store_true') + parser.add_argument('--norm_ins_score', default='sigmoid', choices=['sigmoid', 'softmax']) + parser.add_argument('--cap_prob_clip', default=False, action='store_true') + parser.add_argument('--use_neg_pseudo_box', default=False, action='store_true') + parser.add_argument('--num_neg_box', default=10, type=int) + parser.add_argument('--weighted_mil_loss', default=False, action='store_true') + parser.add_argument('--focal_mil', default=False, action='store_true') + parser.add_argument('--disable_rematch', default=False, action='store_true') + parser.add_argument('--start_refine_epoch', default=-1, type=int) + + + # === For DTW === + parser.add_argument('--align_keep_percentile', type=float, default=0.1) + parser.add_argument('--align_top_band_size', type=int, default=0) + parser.add_argument('--align_drop_z', type=int, default=0) + parser.add_argument('--align_one_to_many', default=False, action='store_true') + parser.add_argument('--align_many_to_one', default=False, action='store_true') + parser.add_argument('--align_contiguous', default=False, action='store_true') + + # === For Sim matcher + parser.add_argument('--set_cost_sim', type=float, default=1.0) + + # === For contrastive === + parser.add_argument('--enable_contrastive', default=False, action='store_true', help='enable contrastive learning') + parser.add_argument('--disable_contrastive_projection', default=False, action='store_true', help='disable contrastive projection layers') + parser.add_argument('--contrastive_hidden_size', type=int, default=128, help='Contrastive hidden size') + parser.add_argument('--contrastive_loss_start_coef', type=float, default=0.1, help='Weight of contrastive loss') + parser.add_argument('--contrastive_loss_temperature', type=float, default=0.1, help='Temperature of cl temperature') + parser.add_argument('--enable_cross_video_cl', type=bool, default=True, help='Enable cross video contrastive loss') + parser.add_argument('--enable_e2t_cl', default=True, action='store_true', help=' enable event-to-text contrastive') + parser.add_argument('--enable_bg_for_cl', default=True, action='store_true', help=' add a class for background events') + parser.add_argument('--set_cost_cl', type=float, default=0.0) + parser.add_argument('--cl_schedule_val', type=float, nargs='+', default=[0, 0.1]) + parser.add_argument('--cl_schedule_time', type=int, nargs='+', default=[0, 2]) + + + + # ***************************** Prior ***************************** + parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all']) + + # ***************************** OPTIMIZER ***************************** + parser.add_argument('--training_scheme', type=str, default='all', choices=['cap_head_only', 'no_cap_head', 'all']) + parser.add_argument('--epoch', type=int, default=25) + parser.add_argument('--batch_size', type=int, default=1, help='batch_size') + parser.add_argument('--batch_size_for_eval', type=int, default=1, help='') + parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value') + parser.add_argument('--optimizer_type', type=str, default='adam') + parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay') + + parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature') + parser.add_argument('--learning_rate_decay_start', type=float, default=8) + parser.add_argument('--learning_rate_decay_every', type=float, default=3) + parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5) + + # ***************************** SAVING AND LOGGING ***************************** + parser.add_argument('--min_epoch_when_save', type=int, default=-1) + parser.add_argument('--save_checkpoint_every', type=int, default=1) + parser.add_argument('--save_all_checkpoint', action='store_true') + parser.add_argument('--save_dir', type=str, default='/mnt/data/pjlab-3090-sport/wuhao/logs/dibs', help='directory to store checkpointed models') + + # ***************************** For Deformable DETR ************************************* + parser.add_argument('--lr_backbone_names', default=["None"], type=str, nargs='+') + parser.add_argument('--lr_backbone', default=2e-5, type=float) + parser.add_argument('--lr_proj', default=0, type=int) + parser.add_argument('--lr_linear_proj_names', default=['reference_points', 'sampling_offsets'], type=str, nargs='+') + parser.add_argument('--lr_linear_proj_mult', default=0.1, type=float) + + # Variants of Deformable DETR + parser.add_argument('--with_box_refine', default=False, action='store_true') + parser.add_argument('--transformer_input_type', default='queries', choices=['gt_proposals', 'prior_proposals', 'learnt_proposals', 'queries']) + + # * Backbone + parser.add_argument('--backbone', default=None, type=str, + help="Name of the convolutional backbone to use") + parser.add_argument('--dilation', action='store_true', + help="If true, we replace stride with dilation in the last convolutional block (DC5)") + parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'), + help="Type of positional embedding to use on top of the image features") + parser.add_argument('--position_embedding_scale', default=2 * np.pi, type=float, + help="position / size * scale") + parser.add_argument('--num_feature_levels', default=4, type=int, help='number of feature levels') + + # * Transformer + + parser.add_argument('--nheads', default=8, type=int, + help="Number of attention heads inside the transformer's attentions") + parser.add_argument('--dec_n_points', default=4, type=int) + parser.add_argument('--enc_n_points', default=4, type=int) + + parser.add_argument('--share_caption_head', type = int ,default=1) + + parser.add_argument('--cap_nheads', default=8, type=int) + parser.add_argument('--cap_dec_n_points', default=4, type=int) + parser.add_argument('--cap_num_feature_levels', default=4, type=int) + parser.add_argument('--disable_mid_caption_heads', action='store_true') + + # Loss + parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false', + help="Disables auxiliary decoding losses (loss at each layer)") + + + # * Loss coefficients + + parser.add_argument('--cls_loss_coef', default=2, type=float) + parser.add_argument('--self_iou_loss_coef', default=0.0, type=float) + parser.add_argument('--ref_rank_loss_coef', default=0.1, type=float) + parser.add_argument('--mil_loss_coef', default=1.0, type=float) + parser.add_argument('--focal_alpha', default=0.25, type=float) + parser.add_argument('--focal_gamma', default=2., type=float) + + + #***************************** Event counter ***************************** + parser.add_argument('--max_eseq_length', default=10, type=int) + parser.add_argument('--lloss_gau_mask', default=1, type=int) + parser.add_argument('--lloss_beta', default=1, type=float) + + # scheduled sampling + parser.add_argument('--scheduled_sampling_start', type=int, default=-1, + help='at what iteration to start decay gt probability') + parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob') + parser.add_argument('--scheduled_sampling_increase_every', type=int, default=2, + help='every how many iterations thereafter to gt probability') + parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05, + help='How much to update the prob') + parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25, + help='Maximum scheduled sampling prob.') + + # reranking + parser.add_argument('--ec_alpha', type=float, default=0.3) + args = parser.parse_args() + + if args.cfg_path: + import_cfg(args.cfg_path, vars(args)) + + if args.random_seed: + import random + seed = int(random.random() * 1000) + new_id = args.id + '_seed{}'.format(seed) + save_folder = os.path.join(args.save_dir, new_id) + while os.path.exists(save_folder): + seed = int(random.random() * 1000) + new_id = args.id + '_seed{}'.format(seed) + save_folder = os.path.join(args.save_dir, new_id) + args.id = new_id + args.seed = seed + + if args.debug: + args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) + args.save_checkpoint_every = 1 + args.shuffle = 0 + + if args.caption_decoder_type == 'none': + assert args.caption_loss_coef == 0 + assert args.set_cost_caption == 0 + + print("args.id: {}".format(args.id)) + return args + +def import_cfg(cfg_path, args): + with open(cfg_path, 'r') as handle: + yml = yaml.load(handle, Loader=yaml.FullLoader) + if 'base_cfg_path' in yml: + base_cfg_path = yml['base_cfg_path'] + import_cfg(base_cfg_path, args) + args.update(yml) + pass +if __name__ == '__main__': + opt = parse_opts() + print(opt) \ No newline at end of file diff --git a/anet_clip/backup/pdvc/CaptioningHead/LSTM.py b/anet_clip/backup/pdvc/CaptioningHead/LSTM.py new file mode 100644 index 0000000000000000000000000000000000000000..4b44fae2e15520e0c09c298d233e686c9b45d36e --- /dev/null +++ b/anet_clip/backup/pdvc/CaptioningHead/LSTM.py @@ -0,0 +1,174 @@ +# This file contains ShowAttendTell and AllImg model + +# ShowAttendTell is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention +# https://arxiv.org/abs/1502.03044 + +# AllImg is a model where +# img feature is concatenated with word embedding at every time step as the input of lstm +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pdb + +import numpy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import * + +class Captioner(nn.Module): + def __init__(self, opt): + super(Captioner, self).__init__() + self.opt = opt + + self.vocab_size = opt.vocab_size + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.max_caption_len = opt.max_caption_len + + self.ss_prob = 0.0 # Schedule sampling probability + self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) + + self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) + self.dropout = nn.Dropout(self.drop_prob_lm) + + self.init_weights() + + def init_weights(self): + initrange = 0.1 + self.embed.weight.data.uniform_(-initrange, initrange) + self.logit.bias.data.fill_(0) + self.logit.weight.data.uniform_(-initrange, initrange) + + def init_hidden(self, batch_size): + weight = next(self.parameters()).data + return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(), + weight.new(self.num_layers, batch_size, self.rnn_size).zero_()) # (h0, c0) + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + max_len = input.shape[1] + output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output + + def forward(self, event, clip, clip_mask, seq): + batch_size = clip.shape[0] + + state = self.init_hidden(batch_size) + outputs = [] + seq = seq.long() + + for i in range(seq.size(1) - 1): + if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample + sample_prob = clip.data.new(batch_size).uniform_(0, 1) + sample_mask = sample_prob < self.ss_prob + if sample_mask.sum() == 0: + it = seq[:, i].clone() + else: + sample_ind = sample_mask.nonzero().view(-1) + it = seq[:, i].data.clone() + prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) + it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) + it = Variable(it, requires_grad=False) + else: + it = seq[:, i].clone() + # break if all the sequences end + if i >= 1 and seq[:, i].data.sum() == 0: + break + + output, state = self.get_logprobs_state(it, event, clip, clip_mask, state) + outputs.append(output) + + return torch.cat([_.unsqueeze(1) for _ in outputs], 1) + + + def get_logprobs_state(self, it, event , clip, clip_mask, state): + xt = self.embed(it) + output, state = self.core(xt, event , clip, clip_mask, state) + logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1) + return logprobs, state + + def sample(self, event , clip, clip_mask, opt={}): + + sample_max = opt.get('sample_max', 1) + beam_size = opt.get('beam_size', 1) + temperature = opt.get('temperature', 1.0) + + batch_size = clip.shape[0] + + state = self.init_hidden(batch_size) + + seq = [] + seqLogprobs = [] + + for t in range(self.max_caption_len + 1): + if t == 0: # input + it = clip.data.new(batch_size).long().zero_() + elif sample_max: + sampleLogprobs, it = torch.max(logprobs.data, 1) + it = it.view(-1).long() + else: + if temperature == 1.0: + prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1) + else: + # scale logprobs by temperature + prob_prev = torch.exp(torch.div(logprobs.data, temperature)) + it = torch.multinomial(prob_prev, 1) + sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions + it = it.view(-1).long() # and flatten indices for downstream processing + + logprobs, state = self.get_logprobs_state(it, event , clip, clip_mask, state) + + if t >= 1: + # stop when all finished + if t == 1: + unfinished = it > 0 + else: + unfinished = unfinished & (it > 0) + if unfinished.sum() == 0: + break + it = it * unfinished.type_as(it) + seq.append(it) #seq[t] the input of t+2 time step + seqLogprobs.append(sampleLogprobs.view(-1)) + + if seq==[] or len(seq)==0: + return [],[] + return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1) + +class AllImgCore(nn.Module): + def __init__(self, opt): + super(AllImgCore, self).__init__() + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.att_feat_size = opt.clip_context_dim + + self.opt = opt + self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type + self.input_dim = self.decide_input_feats_dim() + self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim, + self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) + assert self.wordRNN_input_feats_type == 'C' + + def decide_input_feats_dim(self): + dim = 0 + if 'E' in self.wordRNN_input_feats_type: + dim += self.opt.event_context_dim + if 'C' in self.wordRNN_input_feats_type: + dim += self.opt.clip_context_dim + return dim + + def forward(self, xt, event, clip, clip_mask, state): + input_feats = (clip * clip_mask.unsqueeze(2)).sum(1) / (clip_mask.sum(1, keepdims=True) + 1e-5) + output, state = self.rnn(torch.cat([xt, input_feats], 1).unsqueeze(0), state) + return output.squeeze(0), state + + +class LightCaptioner(Captioner): + def __init__(self, opt): + super(LightCaptioner, self).__init__(opt) + self.core = AllImgCore(opt) diff --git a/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py b/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py new file mode 100644 index 0000000000000000000000000000000000000000..918fb0ccf89416929b4cee8c1deadd7c99d586ae --- /dev/null +++ b/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py @@ -0,0 +1,289 @@ +# This file contains ShowAttendTell and AllImg model + +# ShowAttendTell(Soft attention) is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention +# https://arxiv.org/abs/1502.03044 + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import * + +from pdvc.ops.modules import MSDeformAttnCap + +class Captioner(nn.Module): + def __init__(self, opt): + super(Captioner, self).__init__() + self.opt = opt + + self.vocab_size = opt.vocab_size + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.max_caption_len = opt.max_caption_len + + self.ss_prob = 0.0 # Schedule sampling probability + self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) + + self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) + self.dropout = nn.Dropout(self.drop_prob_lm) + + self.init_weights() + + def init_weights(self): + initrange = 0.1 + self.embed.weight.data.uniform_(-initrange, initrange) + self.logit.bias.data.fill_(0) + self.logit.weight.data.uniform_(-initrange, initrange) + + def init_hidden(self, batch_size): + weight = next(self.parameters()).data + return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(), + weight.new(self.num_layers, batch_size, self.rnn_size).zero_()) # (h0, c0) + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + max_len = input.shape[1] + output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output + + def build_prob(self, input, target, mask): + ''' + Calculate the sentence-level predicted prob for each GT sentence of each query + input: [num_sentence, max_length, num_words_voc] + ''' + # breakpoint() + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) # [num_sentence, max_length, num_words_voc] + max_len = input.shape[1] + # output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6) + output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6) + return output + + def forward(self,hs, reference, others, cap_tensor): + seq = cap_tensor + vid_num, query_num, _ = hs.shape + assert vid_num == 1 + + reference_points = reference + input_flatten = others['memory'] + input_spatial_shapes = others['spatial_shapes'] + input_level_start_index = others['level_start_index'] + input_padding_mask = others['mask_flatten'] + if reference_points.shape[-1] == 2: + reference_points = reference_points[:, :, None] \ + * torch.stack([others['valid_ratios']]*2, -1)[:, None] + elif reference_points.shape[-1] == 1: + reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None, :, None] + + query = hs + batch_size = query.shape[1] + state = self.init_hidden(batch_size) + outputs = [] + raw_probs = [] + seq = seq.long() + + n_levels = self.core.n_levels + if n_levels < self.core.opt.num_feature_levels: + input_spatial_shapes = input_spatial_shapes[:n_levels] + input_level_start_index = input_level_start_index[:n_levels] + total_input_len = torch.prod(input_spatial_shapes, dim=1).sum() + input_flatten = input_flatten[:, :total_input_len] + input_padding_mask = input_padding_mask[:, :total_input_len] + reference_points = reference_points[:, :, :n_levels] + pass + + for i in range(seq.size(1) - 1): + if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample + sample_prob = hs.new_zeros(batch_size).uniform_(0, 1) + sample_mask = sample_prob < self.ss_prob + if sample_mask.sum() == 0: + it = seq[:, i].clone() + else: + sample_ind = sample_mask.nonzero().view(-1) + it = seq[:, i].data.clone() + prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) + it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) + it = Variable(it, requires_grad=False) + else: + it = seq[:, i].clone() + # break if all the sequences end + if i >= 1 and seq[:, i].data.sum() == 0: + break + + output, state, raw_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask) + outputs.append(output) + raw_probs.append(raw_prob) + + if self.opt.refine_pseudo_box and self.training: + return torch.cat([_.unsqueeze(1) for _ in outputs], 1), torch.cat([_.unsqueeze(1) for _ in raw_probs], 1) + + return torch.cat([_.unsqueeze(1) for _ in outputs], 1) + + + def get_logprobs_state(self, it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask): + xt = self.embed(it) + output, state = self.core(xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask) + logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1) + softmax_probs = F.softmax(self.logit(self.dropout(output)), dim=1) + raw_probs = self.logit(self.dropout(output)) + # raw_probs: [max_num_word, vocab_size+1] + return logprobs, state, raw_probs + + def sample(self,hs, reference, others, opt={}): + + vid_num, query_num, _ = hs.shape + assert vid_num == 1 + batch_size = vid_num * query_num + sample_max = opt.get('sample_max', 1) + beam_size = opt.get('beam_size', 1) + temperature = opt.get('temperature', 1.0) + + reference_points = reference + input_flatten = others['memory'] + input_spatial_shapes = others['spatial_shapes'] + input_level_start_index = others['level_start_index'] + input_padding_mask = others['mask_flatten'] + if reference_points.shape[-1] == 2: + reference_points = reference_points[:, :, None] \ + * torch.stack([others['valid_ratios']]*2, -1)[:, None] + elif reference_points.shape[-1] == 1: + reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None,:, None] + query = hs + + n_levels = self.core.n_levels + if n_levels < self.core.opt.num_feature_levels: + input_spatial_shapes = input_spatial_shapes[:n_levels] + input_level_start_index = input_level_start_index[:n_levels] + total_input_len = torch.prod(input_spatial_shapes, dim=1).sum() + input_flatten = input_flatten[:, :total_input_len] + input_padding_mask = input_padding_mask[:, :total_input_len] + reference_points = reference_points[:, :, :n_levels] + pass + + state = self.init_hidden(batch_size) + + seq = [] + seqLogprobs = [] + #breakpoint() + + for t in range(self.max_caption_len + 1): + if t == 0: # input + it = hs.data.new(batch_size).long().zero_() + elif sample_max: + sampleLogprobs, it = torch.max(logprobs.data, 1) + it = it.view(-1).long() + else: + if temperature == 1.0: + prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1) + else: + # scale logprobs by temperature + prob_prev = torch.exp(torch.div(logprobs.data, temperature)) + it = torch.multinomial(prob_prev, 1) + sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions + it = it.view(-1).long() # and flatten indices for downstream processing + + logprobs, state, softmax_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask) + + if t >= 1: + # stop when all finished + if t == 1: + unfinished = it > 0 + else: + unfinished = unfinished & (it > 0) + if unfinished.sum() == 0: + break + it = it * unfinished.type_as(it) + seq.append(it) #seq[t] the input of t+2 time step + seqLogprobs.append(sampleLogprobs.view(-1)) + + if seq==[] or len(seq)==0: + return [],[] + return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1) + + +class ShowAttendTellCore(nn.Module): + + def __init__(self, opt): + super(ShowAttendTellCore, self).__init__() + self.input_encoding_size = opt.input_encoding_size + + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + #self.fc_feat_size = opt.fc_feat_size + self.att_feat_size = int(opt.clip_context_dim / opt.cap_nheads) + self.att_hid_size = opt.att_hid_size + + self.opt = opt + self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type + self.input_dim = opt.hidden_dim * 2 + + self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim , + self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) + self.att_drop = nn.Dropout(0.5) + + d_model = opt.hidden_dim + self.n_levels = opt.cap_num_feature_levels + self.n_heads = opt.cap_nheads + self.n_points = opt.cap_dec_n_points + + self.deformable_att = MSDeformAttnCap(d_model, self.n_levels, self.n_heads, self.n_points) + + if self.att_hid_size > 0: + self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size) + self.h2att = nn.Linear(self.rnn_size, self.att_hid_size) + self.alpha_net = nn.Linear(self.att_hid_size, 1) + + def get_input_feats(self, event, att_clip): + input_feats = [] + if 'E' in self.wordRNN_input_feats_type: + input_feats.append(event) + if 'C' in self.wordRNN_input_feats_type: + input_feats.append(att_clip) + input_feats = torch.cat(input_feats,1) + return input_feats + + def forward(self,xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask): + + joint_query = torch.cat((state[0][-1].unsqueeze(0), query), 2) + # (N_, N_q, C) + + N_, Lq_, L_, _ = reference_points.shape + + # (N_ * M_, D_, Lq_, L_* P_) + clip = self.deformable_att(joint_query, reference_points, input_flatten, input_spatial_shapes, + input_level_start_index, input_padding_mask) + clip = clip.reshape(N_, self.n_heads, -1, Lq_, self.n_levels * self.n_points).permute(0, 3, 1, 4, 2) + clip = clip.reshape(N_ * Lq_, self.n_heads, self.n_levels * self.n_points, self.att_feat_size) + att_size = self.n_levels * self.n_points + + att = self.ctx2att(clip) # (batch * att_size) * att_hid_size + att = att.view(-1, self.n_heads, att_size, self.att_hid_size) # batch * att_size * att_hid_size + att_h = self.h2att(state[0][-1]) # batch * att_hid_size + att_h = att_h.unsqueeze(1).unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size + dot = att + att_h # batch * att_size * att_hid_size + dot = torch.tanh(dot) # batch * att_size * att_hid_size + dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size + dot = self.alpha_net(dot) # (batch * att_size) * 1 + dot = dot.view(-1, att_size) # batch * att_size + + weight = F.softmax(dot, dim=1) + att_feats_ = clip.reshape(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size + att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size + att_res = att_res.reshape(N_ * Lq_, self.n_heads, self.att_feat_size).flatten(1) + input_feats = torch.cat((att_res.unsqueeze(0), query), 2) + # print(xt.shape, input_feats.shape, query.shape, reference_points.shape) + output, state = self.rnn(torch.cat([xt.unsqueeze(0), input_feats], 2), state) + + return output.squeeze(0), state + + +class LSTMDSACaptioner(Captioner): + def __init__(self, opt): + super(LSTMDSACaptioner, self).__init__(opt) + self.core = ShowAttendTellCore(opt) + diff --git a/anet_clip/backup/pdvc/CaptioningHead/Puppet.py b/anet_clip/backup/pdvc/CaptioningHead/Puppet.py new file mode 100644 index 0000000000000000000000000000000000000000..3051b3d3de863fefc196e08740e7d6d05474adfd --- /dev/null +++ b/anet_clip/backup/pdvc/CaptioningHead/Puppet.py @@ -0,0 +1,26 @@ +import torch +import torch.nn as nn + + +class PuppetCaptionModel(nn.Module): + def __init__(self, opt): + super(PuppetCaptionModel, self).__init__() + self.vocab_size = opt.vocab_size + self.opt = opt + self.puppet_layer= nn.Linear(1,1) + + def forward(self, event, clip, clip_mask, seq): + N, L = seq.shape + output = torch.zeros((N, L-1, self.vocab_size + 1), device=seq.device) + return output + + def sample(self, event, clip, clip_mask, opt={}): + N, _, C = clip.shape + output = torch.zeros((N, 3), device=clip.device) + prob = torch.zeros((N, 3), device=clip.device) + return output, prob + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + output = - (one_hot * input * mask[..., None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output \ No newline at end of file diff --git a/anet_clip/backup/pdvc/CaptioningHead/__init__.py b/anet_clip/backup/pdvc/CaptioningHead/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55abd1cc8681971b0e498d5db23771053240029f --- /dev/null +++ b/anet_clip/backup/pdvc/CaptioningHead/__init__.py @@ -0,0 +1,22 @@ +from .LSTM import LightCaptioner +from .Puppet import PuppetCaptionModel +from .LSTM_DSA import LSTMDSACaptioner + +def build_captioner(opt): + if opt.caption_decoder_type == 'none': + caption_embed = PuppetCaptionModel(opt) + + elif opt.caption_decoder_type == 'light': + opt.event_context_dim = None + opt.clip_context_dim = opt.hidden_dim + caption_embed = LightCaptioner(opt) + + elif opt.caption_decoder_type == 'standard': + opt.event_context_dim = None + opt.clip_context_dim = opt.hidden_dim + caption_embed = LSTMDSACaptioner(opt) + + else: + raise ValueError('caption decoder type is invalid') + return caption_embed + diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96e1403d966894f3897772ec3341693c9e1e2097 Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7505e3befe8da0cfc2e2cf4ad989639a7aad658 Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac28b0fdbaca42bce04d24e8200908e43ca3849d Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82aceccc9d18b389c1de136320f99a9d3948bc21 Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..204ecd5a71e01bd0a22222a738ac51abf7b3af9a Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86f06e3b6d2a72ca205a646c86a1e9309be235c6 Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7506f43c89c0c6345ffd3c53b53cd87d5c394cbc Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e0d650e0f33bbf2aa9248e89a8ac9ec8a76397b Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/UniVL.py b/anet_clip/backup/pdvc/UniVL.py new file mode 100644 index 0000000000000000000000000000000000000000..c5a8bcf7f019968d8751bbbab0537295c77ebfdd --- /dev/null +++ b/anet_clip/backup/pdvc/UniVL.py @@ -0,0 +1,238 @@ + +import os +import random +import numpy as np +from pathlib import Path +from pdvc.modules.modeling import UniVL +from pdvc.modules.tokenization import BertTokenizer +from transformers import AutoTokenizer, BertForPreTraining +import torch +import argparse + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + +class UniVL_args(object): + def __init__(self) -> None: + self.do_pretrain = False + self.do_train = False + self.do_eval = True + self.train_csv = 'data/youcookii_singlef_train.csv' + self.val_csv = 'data/youcookii_singlef_val.csv' + self.data_path = 'data/youcookii_caption.pickle' + self.features_path = 'data/youcookii_videos_feature.pickle' + self.num_thread_reader = 1 + self.lr = 0.0001 + self.epochs = 20 + self.batch_size = 256 + self.batch_size_val = 3500 + self.lr_decay = 0.9 + self.n_display = 100 + self.video_dim = 1024 + self.seed = 42 + self.max_words = 48 + self.max_frames = 100 + self.feature_framerate = 1 + self.margin = 0.1 + self.hard_negative_rate = 0.5 + self.negative_weighting = 1 + self.n_pair = 1 + self.output_dir = None + self.bert_model = "bert-base-uncased" + self.visual_model = "visual-base" + self.cross_model = "cross-base" + self.decoder_model = "decoder-base" + self.init_model = None + self.do_lower_case = True + self.warmup_proportion = 0.1 + self.gradient_accumulation_steps = 1 + self.n_gpu = 1 + self.cache_dir = "" + self.fp16 = False + self.fp16_opt_level = 'O1' + self.task_type = "retrieval" + self.datatype = "youcook" + self.world_size = 0 + self.local_rank = 0 + self.coef_lr = 0.1 + self.use_mil = False + self.sampled_use_mil = False + self.text_num_hidden_layers = 12 + self.visual_num_hidden_layers = 6 + self.cross_num_hidden_layers = 2 + self.decoder_num_hidden_layers = 3 + self.train_sim_after_cross = False + self.expand_msrvtt_sentences = False + self.batch_size = int(self.batch_size / self.gradient_accumulation_steps) + + def __repr__(self) -> str: + return str(self.__dict__) + + + + +# def get_args(description='UniVL on Retrieval Task'): +# parser = argparse.ArgumentParser(description=description) +# parser.add_argument("--do_pretrain", action='store_true', help="Whether to run training.") +# parser.add_argument("--do_train", action='store_true', help="Whether to run training.") +# parser.add_argument("--do_eval", action='store_true', default=True, help="Whether to run eval on the dev set.") + +# parser.add_argument('--train_csv', type=str, default='data/youcookii_singlef_train.csv', help='') +# parser.add_argument('--val_csv', type=str, default='data/youcookii_singlef_val.csv', help='') +# parser.add_argument('--data_path', type=str, default='data/youcookii_caption.pickle', help='data pickle file path') +# parser.add_argument('--features_path', type=str, default='data/youcookii_videos_feature.pickle', help='feature path') + +# parser.add_argument('--num_thread_reader', type=int, default=1, help='') +# parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate') +# parser.add_argument('--epochs', type=int, default=20, help='upper epoch limit') +# parser.add_argument('--batch_size', type=int, default=256, help='batch size') +# parser.add_argument('--batch_size_val', type=int, default=3500, help='batch size eval') +# parser.add_argument('--lr_decay', type=float, default=0.9, help='Learning rate exp epoch decay') +# parser.add_argument('--n_display', type=int, default=100, help='Information display frequence') +# parser.add_argument('--video_dim', type=int, default=1024, help='video feature dimension') +# parser.add_argument('--seed', type=int, default=42, help='random seed') +# parser.add_argument('--max_words', type=int, default=20, help='') +# parser.add_argument('--max_frames', type=int, default=100, help='') +# parser.add_argument('--feature_framerate', type=int, default=1, help='') +# parser.add_argument('--margin', type=float, default=0.1, help='margin for loss') +# parser.add_argument('--hard_negative_rate', type=float, default=0.5, help='rate of intra negative sample') +# parser.add_argument('--negative_weighting', type=int, default=1, help='Weight the loss for intra negative') +# parser.add_argument('--n_pair', type=int, default=1, help='Num of pair to output from data loader') + +# parser.add_argument("--output_dir", default=None, type=str, +# help="The output directory where the model predictions and checkpoints will be written.") +# parser.add_argument("--bert_model", default="bert-base-uncased", type=str, +# help="Bert pre-trained model") +# parser.add_argument("--visual_model", default="visual-base", type=str, required=False, help="Visual module") +# parser.add_argument("--cross_model", default="cross-base", type=str, required=False, help="Cross module") +# parser.add_argument("--decoder_model", default="decoder-base", type=str, required=False, help="Decoder module") +# parser.add_argument("--init_model", default=None, type=str, required=False, help="Initial model.") +# parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") +# parser.add_argument("--warmup_proportion", default=0.1, type=float, +# help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% of training.") +# parser.add_argument('--gradient_accumulation_steps', type=int, default=1, +# help="Number of updates steps to accumulate before performing a backward/update pass.") +# parser.add_argument('--n_gpu', type=int, default=1, help="Changed in the execute process.") + +# parser.add_argument("--cache_dir", default="", type=str, +# help="Where do you want to store the pre-trained models downloaded from s3") + +# parser.add_argument('--fp16', action='store_true', +# help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") +# parser.add_argument('--fp16_opt_level', type=str, default='O1', +# help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." +# "See details at https://nvidia.github.io/apex/amp.html") + +# parser.add_argument("--task_type", default="retrieval", type=str, help="Point the task `retrieval` to finetune.") +# parser.add_argument("--datatype", default="youcook", type=str, help="Point the dataset `youcook` to finetune.") + +# parser.add_argument("--world_size", default=0, type=int, help="distribted training") +# parser.add_argument("--local_rank", default=0, type=int, help="distribted training") +# parser.add_argument('--coef_lr', type=float, default=0.1, help='coefficient for bert branch.') +# parser.add_argument('--use_mil', action='store_true', help="Whether use MIL as Miech et. al. (2020).") +# parser.add_argument('--sampled_use_mil', action='store_true', help="Whether MIL, has a high priority than use_mil.") + +# parser.add_argument('--text_num_hidden_layers', type=int, default=12, help="Layer NO. of text.") +# parser.add_argument('--visual_num_hidden_layers', type=int, default=6, help="Layer NO. of visual.") +# parser.add_argument('--cross_num_hidden_layers', type=int, default=2, help="Layer NO. of cross.") +# parser.add_argument('--decoder_num_hidden_layers', type=int, default=3, help="Layer NO. of decoder.") + +# parser.add_argument('--train_sim_after_cross', action='store_true', help="Test retrieval after cross encoder.") +# parser.add_argument('--expand_msrvtt_sentences', action='store_true', help="") + +# args = parser.parse_args() + +# # Check paramenters +# if args.gradient_accumulation_steps < 1: +# raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( +# args.gradient_accumulation_steps)) +# if not args.do_train and not args.do_eval: +# raise ValueError("At least one of `do_train` or `do_eval` must be True.") + +# args.batch_size = int(args.batch_size / args.gradient_accumulation_steps) + +# return args + +def set_seed_logger(args): + # predefining random initial seeds + random.seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) # if you are using multi-GPU. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + # world_size = torch.distributed.get_world_size() + # torch.cuda.set_device(args.local_rank) + # args.world_size = world_size + + # if not os.path.exists(args.output_dir): + # os.makedirs(args.output_dir, exist_ok=True) + + return args + +def load_pretrained_UniVL(return_visual_encoder=False): + + args = UniVL_args() + args = set_seed_logger(args) + device, n_gpu = 'cuda', 1 + + init_model = '/cpfs01/user/liuhuabin/PDVC/pdvc/modules/univl.pretrained.bin' + model_state_dict = torch.load(init_model, map_location='cpu') + + # Prepare model + cache_dir = os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed') + model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base', + cache_dir=cache_dir, state_dict=model_state_dict, task_config=args) + + model.to(device) + if return_visual_encoder: + return model.bert, model.visual, model.normalize_video + else: + return model.bert + +def build_UniVL_tokenizer(): + return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + +# if __name__ == '__main__': +# device, n_gpu = 'cuda', 1 +# captions = ['I love you', 'you believe me'] + +# tokenizer_hg = AutoTokenizer.from_pretrained("bert-base-uncased") +# text_encoder_hg = tokenizer_hg(captions, return_tensors='pt', truncation=True, padding=True, max_length=20) +# text_encoder_hg = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_hg.items()} +# attention_mask = text_encoder_hg['attention_mask'] + +# args = UniVL_args() +# args = set_seed_logger(args) +# args.init_model = 'modules/univl.pretrained.bin' +# # tokenizer = build_UniVL_tokenizer() +# # input_ids = [] +# # for sent in captions: +# # sent = tokenizer.tokenize(sent) +# # sent = ['[CLS]'] + sent + ['[SEP]'] +# # input_ids += tokenizer.convert_tokens_to_ids(sent) +# model = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model) +# text_embed = model(**text_encoder_hg, output_all_encoded_layers=True)[0][-1] +# breakpoint() + +if __name__ == '__main__': + device, n_gpu = 'cuda', 1 + args = UniVL_args() + args = set_seed_logger(args) + args.init_model = 'modules/univl.pretrained.bin' + # tokenizer = build_UniVL_tokenizer() + # input_ids = [] + # for sent in captions: + # sent = tokenizer.tokenize(sent) + # sent = ['[CLS]'] + sent + ['[SEP]'] + # input_ids += tokenizer.convert_tokens_to_ids(sent) + model_bert, model_visual, video_normalizer = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model) + inputs = torch.rand(2,215,1024) + video_mask = torch.ones(2,215) + inputs = video_normalizer(inputs) + visual_embed = model_visual(inputs, video_mask, output_all_encoded_layers=True)[0][-1] + + breakpoint() \ No newline at end of file diff --git a/anet_clip/backup/pdvc/__init__.py b/anet_clip/backup/pdvc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..194ecd26a483cef3e67c0e5cd971d4f7784aac67 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a406cf3565bfcd54eddc5d19fbeae7bffd2d629 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4a911fc83c9364bfc6b98dd5d3d5a4ed14f5e3f Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9cafe04379877ab0c87872ae9835aa9bdf4532a4 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6961cba44a3fa93be1463250d574c8d91411714f Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d08274f898128d993db3370b9307fabf56c98f6 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f48fdb961f47546c71e60e995699a206b62a4f6a Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d45de6e0f900d019a24e0f339e62874f2038557e Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6efd23cdeac69c752a715a184606139f2aded19b Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f80042e195d3ecda40db7fe17e8b2b6b8991a376 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f7bef286e186c9f27de1ea48197eee0fae6a7d6f Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c50c9f41bc67334949478d72b69f998d849c9f37 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2b9fbde23c0b61d1377c3e8a2c9af095131c45d Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e503a7b7440cff82242de19b9d909ba99e5f803 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df84303c83b25082e579d99e0bdbc7c05bf182ef Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f2bec65e4730af469226f8efdb168b47da926ef Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff5a503c7efa463a12801a2f62599ed146e5ca93 Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/base_encoder.py b/anet_clip/backup/pdvc/base_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6cb150a62dbb709589ec5271fe1b11ec16adf8f8 --- /dev/null +++ b/anet_clip/backup/pdvc/base_encoder.py @@ -0,0 +1,86 @@ +# ------------------------------------------------------------------------ +# PDVC +# ------------------------------------------------------------------------ +# Modified from Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Base Encoder to create multi-level conv features and positional embedding. +""" + +import torch +import torch.nn.functional as F +from torch import nn +from misc.detr_utils.misc import NestedTensor +from .position_encoding import PositionEmbeddingSine + + +class BaseEncoder(nn.Module): + def __init__(self, num_feature_levels, vf_dim, hidden_dim): + super(BaseEncoder, self).__init__() + self.pos_embed = PositionEmbeddingSine(hidden_dim//2, normalize=True) + self.num_feature_levels = num_feature_levels + self.hidden_dim = hidden_dim + + if num_feature_levels > 1: + input_proj_list = [] + in_channels = vf_dim + input_proj_list.append(nn.Sequential( + nn.Conv1d(in_channels, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )) + for _ in range(num_feature_levels - 1): + input_proj_list.append(nn.Sequential( + nn.Conv1d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1), + nn.GroupNorm(32, hidden_dim), + )) + in_channels = hidden_dim + self.input_proj = nn.ModuleList(input_proj_list) + else: + self.input_proj = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(vf_dim, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )]) + + for proj in self.input_proj: + nn.init.xavier_uniform_(proj[0].weight, gain=1) + nn.init.constant_(proj[0].bias, 0) + + def forward(self, vf, mask, duration): + # vf: (N, L, C), mask: (N, L), duration: (N) + vf = vf.transpose(1, 2) # (N, L, C) --> (N, C, L) + vf_nt = NestedTensor(vf, mask, duration) + pos0 = self.pos_embed(vf_nt) + + srcs = [] + masks = [] + poses = [] + + src0, mask0 = vf_nt.decompose() + srcs.append(self.input_proj[0](src0)) + masks.append(mask0) + poses.append(pos0) + assert mask is not None + + for l in range(1, self.num_feature_levels): + if l == 1: + src = self.input_proj[l](vf_nt.tensors) + else: + src = self.input_proj[l](srcs[-1]) + m = vf_nt.mask + mask = F.interpolate(m[None].float(), size=src.shape[-1:]).to(torch.bool)[0] + pos_l = self.pos_embed(NestedTensor(src, mask, duration)).to(src.dtype) + srcs.append(src) + masks.append(mask) + poses.append(pos_l) + return srcs, masks, poses + +def build_base_encoder(args): + base_encoder = BaseEncoder(args.num_feature_levels, args.feature_dim, args.hidden_dim) + return base_encoder diff --git a/anet_clip/backup/pdvc/criterion.py b/anet_clip/backup/pdvc/criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..d47eb41a6711be9904ad6c55d502572261ff73c9 --- /dev/null +++ b/anet_clip/backup/pdvc/criterion.py @@ -0,0 +1,726 @@ +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ +import copy +import torch +import torch.nn.functional as F +from torch import nn + +from misc.detr_utils import box_ops +from misc.detr_utils.misc import (accuracy, get_world_size, + is_dist_avail_and_initialized) + +class SetCriterion(nn.Module): + """ This class computes the loss for DETR. + The process happens in two steps: + 1) we compute hungarian assignment between ground truth boxes and the outputs of the model + 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) + """ + def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + losses: list of all the losses to be applied. See get_loss for list of available losses. + focal_alpha: alpha in Focal Loss + """ + super().__init__() + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_alpha = focal_alpha + self.focal_gamma = focal_gamma + self.opt = opt + self.pseudo_box_aug = opt.pseudo_box_aug + self.refine_pseudo_box = opt.refine_pseudo_box + if ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]): + counter_class_rate =[0.0, 0.012703673018503175, 0.04915769124551229, 0.06489919911626622, 0.0740127036730185, 0.07346037006351837, 0.08064070698702017, + 0.07069870201601768, 0.07870753935376967, 0.07097486882076774, 0.06766086716376692, 0.0579950289975145, 0.05247169290251312, 0.03783485225075946, + 0.03534935100800884, 0.03203534935100801, 0.026788180060756697, 0.02236951118475559, 0.01988400994200497, 0.016570008285004142, 0.013256006628003313, + 0.00856117094725214, 0.006904170118751726, 0.005523336095001381, 0.004694835680751174, 0.0038663352665009665, 0.0027616680475006906, 0.0027616680475006906, + 0.0016570008285004142, 0.0016570008285004142, 0.0005523336095001381, 0.0008285004142502071, 0.0, 0.00027616680475006904, 0.0, 0.0, 0.00027616680475006904, + 0.0011046672190002762, 0.0, 0.0005523336095001381, 0.0, 0.0, 0.0005523336095001381] + else: + counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01, + 1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02, + 1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03, + 2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04, + 4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04, + 2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05] + self.counter_class_rate = torch.tensor(counter_class_rate) + + def loss_labels(self, outputs, targets, indices, num_boxes, log=True): + """Classification loss (NLL) + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + indices, many2one_indices = indices + assert 'pred_logits' in outputs + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + target_classes[idx] = target_classes_o + + target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1], + dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device) + target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1) + + target_classes_onehot = target_classes_onehot[:,:,:-1] + loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1] + losses = {'loss_ce': loss_ce} + pred_count = outputs['pred_count'] + max_length = pred_count.shape[1] - 1 + counter_target = [len(target['boxes']) if len(target['boxes']) < max_length else max_length for target in targets] + counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long) + counter_target_onehot = torch.zeros_like(pred_count) + counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1) + weight = self.counter_class_rate[:max_length + 1].to(src_logits.device) + + counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight) + losses['loss_counter'] = counter_loss + + return losses + + @torch.no_grad() + def loss_cardinality(self, outputs, targets, indices, num_boxes): + """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes + This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients + """ + pred_logits = outputs['pred_logits'] + device = pred_logits.device + tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) + # Count the number of predictions that are NOT "no-object" (which is the last class) + card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) + card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) + losses = {'cardinality_error': card_err} + return losses + + def loss_boxes(self, outputs, targets, indices, num_boxes): + """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss + targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 2] + The target boxes are expected in format (center, length), normalized by the image size. + """ + indices, many2one_indices = indices + N = len(indices[-1][0]) + assert 'pred_boxes' in outputs + idx, idx2 = self._get_src_permutation_idx2(indices) + src_boxes = outputs['pred_boxes'][idx] + if self.opt.use_pseudo_box and self.training: + # print('use pseudo box') + target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0) + else: + # print('use gt box') + target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0) + loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') + + losses = {} + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(target_boxes))) + losses['loss_giou'] = loss_giou.sum() / num_boxes + # print(src_boxes) + self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1) + sizes = [len(v[0]) for v in indices] + if sizes == [1]: + losses['loss_self_iou'] = self_iou + return losses + self_iou_split = 0 + for i, c in enumerate(self_iou.split(sizes, -1)): + cc = c.split(sizes, -2)[i] + self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1)) + has_nan = False if torch.all(~torch.isnan(self_iou_split)) else True + has_inf = False if torch.all(torch.isfinite(self_iou_split)) else True + if has_nan or has_inf: + breakpoint() + losses['loss_self_iou'] = self_iou_split + + return losses + + def _get_src_permutation_idx(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx + + def _get_src_permutation_idx2(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + src_idx2 = torch.cat([src for (_, src) in indices]) + return (batch_idx, src_idx), src_idx2 + + def _get_tgt_permutation_idx(self, indices): + # permute targets following indices + batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) + tgt_idx = torch.cat([tgt for (_, tgt) in indices]) + return batch_idx, tgt_idx + + + + def get_jittered_box(self, box, box_jitter, box_aug_num=5, mode='random'): + # breakpoint() + box = box.unsqueeze(0) # (1,2) + if mode == 'random': + scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale = torch.cat([scale_c, scale_d], dim=1) + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel() + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]] + elif mode == 'random_new': + scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale = torch.cat([scale_c, scale_d], dim=1) + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel() + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]] + elif mode == 'uniform': + ratio_c = box_jitter + ratio_d = 0.048 / 2 + scale_c = torch.tensor([-ratio_c, -ratio_c/2, -ratio_c/4, ratio_c/4, ratio_c/2, ratio_c]) + scale_d = torch.tensor([-ratio_d, -ratio_d/2, ratio_d/2, ratio_d]) + scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) + breakpoint() + scale_box = box + scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + unkeep_idx = torch.where(iou.reshape(-1) <= 0.1)[0] + if keep_idx.numel() < (box_aug_num-1): + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:keep_idx.numel()] = scale_box[keep_idx] + random_indices = torch.randperm(unkeep_idx.size(0))[:(box_aug_num-1-keep_idx.numel())] + box_repeat[keep_idx.numel():(box_aug_num-1)] = scale_box[unkeep_idx[random_indices]] + else: + box_repeat = box.repeat(box_aug_num, 1) + random_indices = torch.randperm(keep_idx.numel())[:(box_aug_num-1)] + box_repeat[:box_aug_num-1] = scale_box[keep_idx[random_indices]] + elif mode == 'uniform_old': + # Conduct augment using pre-defined ratio + ratio_c = box_jitter + ratio_d = box_jitter + scale_c = torch.linspace(1-ratio_c, 1+ratio_c, 4) + scale_d = torch.linspace(1-ratio_d, 1+ratio_d, 2) + scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) # 16 augmented boxes in total + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + # keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + box_repeat = box.repeat(box_aug_num, 1) + random_indices = torch.randperm(scale_box.size(0))[:(box_aug_num-1)] + box_repeat[:(box_aug_num-1)] = scale_box[random_indices] + elif mode == 'random_range': + def batch_randomize_boxes(boxes, max_vary_range, num_samples=1): + # Get the centers and widths from the input boxes + centers = boxes[:, 0] + widths = boxes[:, 1] + # breakpoint() + # Generate random values for the left and right boundaries for each box + + left_boundaries = centers - (widths / 2) - torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range) + right_boundaries = centers + (widths / 2) + torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range) + + # Ensure that the boundaries stay within the [0, 1] range + left_boundaries = left_boundaries.clamp(0, 1) + right_boundaries = right_boundaries.clamp(0, 1) + + + # Calculate the new centers and widths + new_centers = (left_boundaries + right_boundaries) / 2 + new_widths = right_boundaries - left_boundaries + + # Ensure that the widths are non-negative and revert to the original boxes if needed + is_negative = new_widths <= 0 + new_widths = torch.where(is_negative, widths, new_widths) + new_centers = torch.where(is_negative, centers, new_centers) + + # Create and return the new boxes tensor + new_boxes = torch.stack((new_centers, new_widths), dim=2) + return new_boxes.squeeze(0) + box_repeat = batch_randomize_boxes(box, box_jitter, box_aug_num) + if torch.isnan(box_repeat).any(): + breakpoint() + elif mode == 'augment_width': # original width is 0.5 \sigma range + import random + def augment_boxes_with_scale(boxes, scale, num_augments): + augmented_boxes = [] + for _ in range(num_augments): + center, width = boxes[0] + # Generate a random scale factor with a more uniform distribution + random_scale = scale ** random.uniform(-1, 1) + new_width = width * random_scale + if center + new_width / 2 > 1 or center - new_width / 2 < 0: + new_width = width + augmented_boxes.append([center, new_width]) + augmented_boxes = torch.tensor(augmented_boxes, device=boxes.device) + return augmented_boxes + box_repeat = augment_boxes_with_scale(box, box_jitter, box_aug_num) + # breakpoint() + + else: + raise NotImplementedError('Not support box augmentation mode: {}'.format(mode)) + return box_repeat + + def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'cardinality': self.loss_cardinality, + 'boxes': self.loss_boxes, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) + + def forward(self, outputs, targets, others=None, aug_num=None, aug_ratio=None): + """ This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + targets: list of dicts, such that len(targets) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'} + if self.training and self.pseudo_box_aug: + targets_cp = copy.deepcopy(targets) + assert self.opt.use_pseudo_box + for i in range((len(targets_cp))): + boxes_aug = [] + for j in range(len(targets_cp[i]['labels'])): + try: + pseudo_box = targets_cp[i]['boxes_pseudo'][j] + except: + breakpoint() + peseudo_box_aug = self.get_jittered_box(pseudo_box, aug_ratio, aug_num, self.opt.pseudo_box_aug_mode) + boxes_aug.append(peseudo_box_aug) + targets_cp[i]['boxes_pseudo'] = torch.cat(boxes_aug, dim=0) + targets_cp[i]['labels'] = targets_cp[i]['labels'].unsqueeze(dim=1).repeat(1, aug_num).reshape(-1,) + targets[i]['box_pseudo_aug'] = torch.cat(boxes_aug, dim=0) + # Retrieve the matching between the outputs of the last layer and the targets + last_indices = self.matcher(outputs_without_aux, targets_cp) + else: + targets_cp = targets + last_indices = self.matcher(outputs_without_aux, targets) + outputs['matched_indices'] = last_indices + + num_boxes = sum(len(t["labels"]) for t in targets_cp) + num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + + # Compute all the requested losses + losses = {} + for loss in self.losses: + kwargs = {} + losses.update(self.get_loss(loss, outputs, targets_cp, last_indices, num_boxes, **kwargs)) + + # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. + if 'aux_outputs' in outputs: + aux_indices = [] + for i, aux_outputs in enumerate(outputs['aux_outputs']): + indices = self.matcher(aux_outputs, targets_cp) + aux_indices.append(indices) + for loss in self.losses: + if loss == 'masks': + # Intermediate masks losses are too costly to compute, we ignore them. + continue + kwargs = {} + if loss == 'labels': + # Logging is enabled only for the last layer + kwargs['log'] = False + l_dict = self.get_loss(loss, aux_outputs, targets_cp, indices, num_boxes, **kwargs) + l_dict = {k + f'_{i}': v for k, v in l_dict.items()} + losses.update(l_dict) + + return losses, last_indices, aux_indices + return losses, last_indices + +class AlignCriterion(nn.Module): + """ This class computes the loss for DETR. + The process happens in two steps: + 1) we compute DTW assignment between ground truth captions and the outputs object queries + 2) we supervise each pair of matched ground-truth / prediction (supervise class) + """ + def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + losses: list of all the losses to be applied. See get_loss for list of available losses. + focal_alpha: alpha in Focal Loss + """ + super().__init__() + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_alpha = focal_alpha + self.focal_gamma = focal_gamma + self.opt = opt + counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01, + 1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02, + 1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03, + 2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04, + 4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04, + 2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05] + self.counter_class_rate = torch.tensor(counter_class_rate) + + def loss_labels(self, outputs, targets, indices, num_boxes, log=True): + """Classification loss (NLL) + Compute the classification loss and counter loss + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + indices, many2one_indices = indices + assert 'pred_logits' in outputs + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + target_classes[idx] = target_classes_o + + target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1], + dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device) + target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1) + + target_classes_onehot = target_classes_onehot[:,:,:-1] + loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1] + losses = {'loss_ce': loss_ce} + + pred_count = outputs['pred_count'] + max_length = pred_count.shape[1] - 1 + counter_target = [len(target['boxes']) if len(target['boxes']) < max_length else max_length for target in targets] + counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long) + counter_target_onehot = torch.zeros_like(pred_count) + counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1) + weight = self.counter_class_rate[:max_length + 1].to(src_logits.device) + # breakpoint() + counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight) + losses['loss_counter'] = counter_loss + + return losses + + def loss_boxes(self, outputs, targets, indices, num_boxes): + # Compute temporal IOU loss among given predicted N temporal boundaries, which encourages the temporal boundaries to be more diverse and no overlap + # outputs: (bsz, num_query, 2) + # breakpoint() + # breakpoint() + indices, many2one_indices = indices + idx, idx2 = self._get_src_permutation_idx2(indices) + src_boxes = outputs['pred_boxes'][idx] # num_boxes, 2 + avg_duration = torch.mean(src_boxes[:, 1]) + center_point = src_boxes[:,0] + N = len(indices[-1][0]) + + losses = {} + + if self.opt.use_pseudo_box and self.training: + # If generate peseudo ground truth boxes from alignment, use the alignment boxes as the target boxes + target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0) + loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(target_boxes))) + losses['loss_giou'] = loss_giou.sum() / num_boxes + + if not self.opt.use_pseudo_box: + ## Squence Ordering loss + rank_margin = 0.01 + pairs = torch.combinations(torch.arange(center_point.size(0)), 2) + rank_dist = center_point[pairs[:, 0]] - center_point[pairs[:, 1]] + rank_margin + # Make sure that the center points are ordered + rank_loss = torch.relu(rank_margin + rank_dist).mean() + + losses['loss_ref_rank'] = rank_loss + + ## Self IOU loss + prior_duration = 0.06 + self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1) + sizes = [len(v[0]) for v in indices] + self_iou_split = 0 + for i, c in enumerate(self_iou.split(sizes, -1)): + cc = c.split(sizes, -2)[i] + self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1)) + duration_constraint = torch.abs(prior_duration/(avg_duration + 1e-6) - 1) + self_iou_split += duration_constraint + + + losses['loss_self_iou'] = self_iou_split + + return losses + + @torch.no_grad() + def loss_cardinality(self, outputs, targets, indices, num_boxes): + """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes + This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients + """ + pred_logits = outputs['pred_logits'] + device = pred_logits.device + tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) + # Count the number of predictions that are NOT "no-object" (which is the last class) + card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) + card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) + losses = {'cardinality_error': card_err} + return losses + + def _get_src_permutation_idx(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx + + def _get_src_permutation_idx2(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + src_idx2 = torch.cat([src for (_, src) in indices]) + return (batch_idx, src_idx), src_idx2 + + def _get_tgt_permutation_idx(self, indices): + # permute targets following indices + batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) + tgt_idx = torch.cat([tgt for (_, tgt) in indices]) + return batch_idx, tgt_idx + + def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'boxes': self.loss_boxes, + 'cardinality': self.loss_cardinality, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) + + def forward(self, outputs, targets, others): + """ This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + targets: list of dicts, such that len(targets) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + text_embed = others['text_embed'] # num_dec_layers, num_sentence, dim + event_embed = others['event_embed'] # num_dec_layers, num_query, dim + dim = event_embed.shape[-1] + + # Retrieve the matching between the outputs of the last layer and the targets + # if self.opt.matcher_type == 'DTW': + # last_indices = self.matcher(text_embed[-1], event_embed[-1].reshape(-1, dim)) + # elif self.opt.matcher_type == 'Sim': + # last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + # else: + # raise NotImplementedError('Align Criterion does not support:{}'.format(self.opt.matcher_type)) + #breakpoint() + last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + outputs['matched_indices'] = last_indices + + num_boxes = sum(len(t["labels"]) for t in targets) + num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + # Compute all the requested losses + losses = {} + for loss in self.losses: + kwargs = {} + losses.update(self.get_loss(loss, outputs, targets, last_indices, num_boxes, **kwargs)) + + # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. + if 'aux_outputs' in outputs: + aux_indices = [] + for i, aux_outputs in enumerate(outputs['aux_outputs']): + indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + aux_indices.append(indices) + for loss in self.losses: + kwargs = {} + if loss == 'labels': + # Logging is enabled only for the last layer + kwargs['log'] = False + l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) + l_dict = {k + f'_{i}': v for k, v in l_dict.items()} + losses.update(l_dict) + + return losses, last_indices, aux_indices + return losses, last_indices + +class ContrastiveCriterion(nn.Module): + ''' + Contrastive loss between event feature and caption feature + ''' + + def __init__(self, temperature=0.1, enable_cross_video_cl=False, enable_e2t_cl=False, enable_bg_for_cl=False): + super().__init__() + self.temperature = temperature + self.enable_cross_video_cl = enable_cross_video_cl + self.enable_e2t_cl = enable_e2t_cl + self.enable_bg_for_cl = enable_bg_for_cl + + def forward_logits(self, text_embed, event_embed, bg_embed=None): + normalized_text_emb = F.normalize(text_embed, p=2, dim=1) + normalized_event_emb = F.normalize(event_embed, p=2, dim=1) + logits = torch.mm(normalized_text_emb, normalized_event_emb.t()) + if bg_embed is not None: + bg_logits = torch.sum(normalized_event_emb * F.normalize(bg_embed, p=2), dim=1) + logits = torch.cat((logits, bg_logits.unsqueeze(0)), dim=0) + return logits + + + def forward(self, text_embed, event_embed, matching_indices, return_logits=False, bg_embed=None): + + ''' + :param text_embed: [(event_num, contrastive_hidden_size)], len = batch size + total_event_number = sum of event number of each item in current batch + :param event_embed: (bsz, max_event_num, contrastive_hiddent_size), which need to be + expand in this function + :param matching_indices: (bsz, event_num) + ''' + batch_size, max_event_num, _ = event_embed.shape + event_embed, text_embed, gt_labels, gt_event_num = self._preprocess(event_embed, [text_embed], matching_indices) + raw_logits = self.forward_logits(text_embed, event_embed) + logits = raw_logits / self.temperature + + if self.enable_cross_video_cl: + t2e_loss = F.cross_entropy(logits, gt_labels) + if self.enable_e2t_cl: + gt_label_matrix = torch.zeros(len(text_embed) + 1, len(event_embed), device=text_embed.device) + gt_label_matrix[torch.arange(len(gt_labels)), gt_labels] = 1 + event_mask = gt_label_matrix.sum(dim=0) == 0 + gt_label_matrix[-1, event_mask] = 1 + e2t_gt_label = gt_label_matrix.max(dim=0)[1] + bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1) + e2t_logits = torch.cat((logits, bg_logits.unsqueeze(0) / self.temperature), dim=0) + if self.enable_bg_for_cl: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label) + else: + e2t_loss = F.cross_entropy(e2t_logits.t()[~event_mask], e2t_gt_label[~event_mask]) + loss = 0.5 * (t2e_loss + e2t_loss) + else: + loss = t2e_loss + else: + loss = 0; base = 0 + for i in range(batch_size): + current_gt_event_num = gt_event_num[i] + current_logits = logits[base: base + current_gt_event_num, i * max_event_num: (i + 1) * max_event_num] + current_gt_labels = gt_labels[base: base + current_gt_event_num] + t2e_loss = F.cross_entropy(current_logits, current_gt_labels) + if self.enable_e2t_cl: + gt_label_matrix = torch.zeros(gt_event_num[i] + 1, max_event_num, device=text_embed.device) + gt_label_matrix[torch.arange(current_gt_labels), current_gt_labels] = 1 + event_mask = gt_label_matrix.sum(dim=0) == 0 + e2t_gt_label = gt_label_matrix.max(dim=0)[1] + bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1) + e2t_logits = torch.cat((current_logits, bg_logits.unsqueeze(0) / self.temperature), dim=0) + if self.enable_bg_for_cl: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label) + else: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label, ignore_index=len(text_embed), reduction='sum') / (1e-5 + sum(~event_mask)) + loss += 0.5 * (t2e_loss + e2t_loss) + else: + loss += t2e_loss + base += current_gt_event_num + loss = loss / batch_size + # pdb.set_trace() + if return_logits: + return loss, raw_logits + return loss + + + def _preprocess(self, event_embed, text_embed, matching_indices): + ''' + Flatten event_embed of a batch, get gt label + + :param matching_indices: [(event_num, )] len = bsz + ''' + batch_size, max_event_num, f_dim = event_embed.shape + gt_labels = [] + text_features = [] + gt_event_num = [] + event_features = event_embed.view(-1, f_dim) + for i in range(batch_size): + base = i * max_event_num if self.enable_cross_video_cl else 0 + feat_ids, cap_ids = matching_indices[i] + gt_event_num.append(len(feat_ids)) + text_features.append(text_embed[i][cap_ids]) + gt_labels.append(feat_ids + base) + text_features = torch.cat(text_features, dim=0) + gt_labels = torch.cat(gt_labels, dim=0) + gt_labels = gt_labels.to(event_embed.device) + + return event_features, text_features, gt_labels, gt_event_num + +def cross_entropy_with_gaussian_mask(inputs, targets, opt, weight): + gau_mask = opt.lloss_gau_mask + beta = opt.lloss_beta + + N_, max_seq_len = targets.shape + gassian_mu = torch.arange(max_seq_len, device=inputs.device).unsqueeze(0).expand(max_seq_len, + max_seq_len).float() + x = gassian_mu.transpose(0, 1) + gassian_sigma = 2 + mask_dict = torch.exp(-(x - gassian_mu) ** 2 / (2 * gassian_sigma ** 2)) + _, ind = targets.max(dim=1) + mask = mask_dict[ind] + + loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none", weight= 1 - weight) + if gau_mask: + coef = targets + ((1 - mask) ** beta) * (1 - targets) + else: + coef = targets + (1 - targets) + loss = loss * coef + loss = loss.mean(1) + return loss.mean() + +def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): + """ + Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. + Args: + inputs: A float tensor of arbitrary shape. + The predictions for each example. + targets: A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + alpha: (optional) Weighting factor in range (0,1) to balance + positive vs negative examples. Default = -1 (no weighting). + gamma: Exponent of the modulating factor (1 - p_t) to + balance easy vs hard examples. + Returns: + Loss tensor + """ + + prob = inputs.sigmoid() + ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") # with_logits func calculates sigmoid and CE jointly + p_t = prob * targets + (1 - prob) * (1 - targets) + loss = ce_loss * ((1 - p_t) ** gamma) + + if alpha >= 0: + alpha_t = alpha * targets + (1 - alpha) * (1 - targets) + loss = alpha_t * loss + + return loss.mean(1).sum() / num_boxes + +def regression_loss(inputs, targets, opt, weight): + inputs = F.relu(inputs) + 2 + max_id = torch.argmax(targets, dim=1) + if opt.regression_loss_type == 'l1': + loss = nn.L1Loss()(inputs[:, 0], max_id.float()) + elif opt.regression_loss_type == 'l2': + loss = nn.MSELoss()(inputs[:, 0], max_id.float()) + return loss \ No newline at end of file diff --git a/anet_clip/backup/pdvc/deformable_transformer.py b/anet_clip/backup/pdvc/deformable_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..5e9b742061b166e0badc41db80f5423b0e46a746 --- /dev/null +++ b/anet_clip/backup/pdvc/deformable_transformer.py @@ -0,0 +1,496 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +import copy +import math + +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn.init import xavier_uniform_, constant_, normal_ + +from misc.detr_utils.misc import inverse_sigmoid +from pdvc.ops.modules import MSDeformAttn + + +class DeformableTransformer(nn.Module): + def __init__(self, d_model=256, nhead=8, + num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1, + activation="relu", return_intermediate_dec=False, + num_feature_levels=4, dec_n_points=4, enc_n_points=4, use_anchor=False): + super().__init__() + + self.d_model = d_model + self.nhead = nhead + self.use_anchor = use_anchor + + self.no_encoder = (num_encoder_layers == 0) + self.num_feature_levels = num_feature_levels + + encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward, + dropout, activation, + num_feature_levels, nhead, enc_n_points) + self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers) + + decoder_layer = DeformableTransformerDecoderLayer(d_model, dim_feedforward, + dropout, activation, + num_feature_levels, nhead, dec_n_points) + self.decoder = DeformableTransformerDecoder(decoder_layer, num_decoder_layers, return_intermediate_dec, d_model, use_anchor) + + self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model)) + + self.pos_trans = nn.Linear(d_model, d_model * 2) + self.pos_trans_norm = nn.LayerNorm(d_model * 2) + self.reference_points = nn.Linear(d_model, 1) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + for m in self.modules(): + if isinstance(m, MSDeformAttn): + m._reset_parameters() + # if not self.use_anchor: + xavier_uniform_(self.reference_points.weight.data, gain=1.0) + constant_(self.reference_points.bias.data, 0.) + normal_(self.level_embed) + + + def get_proposal_pos_embed(self, proposals): + num_pos_feats = 256 + temperature = 10000 + scale = 2 * math.pi + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) + # N, L, 2 + proposals = proposals.sigmoid() * scale + # N, L, 2, 256 + pos = proposals[:, :, :, None] / dim_t + # N, L, 2, 128, 2 + pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()), dim=4).flatten(2) + return pos + + def get_proposal_pos_embed_1d(self, proposals): + num_pos_feats = 512 + temperature = 10000 + scale = 2 * math.pi + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) + + # N, L + proposals = proposals.sigmoid() * scale + # N, L, 512 + pos = proposals[:, None] / dim_t + + pos = torch.stack((pos[:, 0::2].sin(), pos[:, 1::2].cos()), dim=2).flatten(1) + return pos + + def get_valid_ratio(self, mask): + valid_ratio_L = torch.sum(~mask, 1).float() / mask.shape[1] + return valid_ratio_L + + def prepare_encoder_inputs(self, srcs, masks, pos_embeds): + # prepare input for encoder + src_flatten = [] + mask_flatten = [] + lvl_pos_embed_flatten = [] + temporal_shapes = [] + for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)): + """ + lvl: (bs, ) + src: (bs, c, L ) + mask: (bs, L) + pos_embed: (bs, d_m, L) + """ + bs, c, L = src.shape + temporal_shapes.append(L) + src = src.transpose(1, 2) # (bs, L, c) + pos_embed = pos_embed.transpose(1, 2) # #(bs, L, d_m) + lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1) + lvl_pos_embed_flatten.append(lvl_pos_embed) + src_flatten.append(src) + mask_flatten.append(mask) + src_flatten = torch.cat(src_flatten, 1) # (lvl_num, bs, wh, c) + mask_flatten = torch.cat(mask_flatten, 1) # (lvl_num, bs, wh) + lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1) # (lvl_num, bs, wh, d_m) + temporal_shapes = torch.as_tensor(temporal_shapes, dtype=torch.long, device=src_flatten.device) # (lvl_num, 2) + level_start_index = torch.cat((temporal_shapes.new_zeros((1,)), temporal_shapes.cumsum(0)[ + :-1])) # prod: [w0h0, w0h0+w1h1, w0h0+w1h1+w2h2, ...] + valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks], + 1) # (bs, lvl_num, 2), where 2 means (h_rate, and w_rate), all values <= 1 + + return src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten + + def forward_encoder(self, src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, + mask_flatten): + # encoder + if self.no_encoder: + memory = src_flatten + else: + memory = self.encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, + mask_flatten) + + return memory + + def prepare_decoder_input_query(self, memory, query_embed): + bs, _, _ = memory.shape + query_embed, tgt = torch.chunk(query_embed, 2, dim=1) + query_embed = query_embed.unsqueeze(0).expand(bs, -1, -1) + tgt = tgt.unsqueeze(0).expand(bs, -1, -1) + reference_points = self.reference_points(query_embed).sigmoid() # (bs, object_query, 1) + init_reference_out = reference_points # (bs, object_query, 1) + return init_reference_out, tgt, reference_points, query_embed + + def prepare_init_anchor_and_query(self, anchor_embed, hidden_dim, random_anchor_init=False, prior_anchor_duration_init=False, prior_duration=0.048): + num_queries = anchor_embed.weight.shape[0] + # query_embed = nn.Embedding(num_queries, hidden_dim) + if random_anchor_init: + anchor_embed.weight.data[:, :1] = torch.linspace(0, 1, num_queries).unsqueeze(1) + anchor_embed.weight.data[:, :1] = inverse_sigmoid(anchor_embed.weight.data[:, :1]) + print('Initilize the anchor center point with uniform distribution') + #self.anchor_embed.weight.data[:, :1].requires_grad = False # DAB-anchor set this to be False + anchor_embed.weight.data[:, :1].requires_grad = True # I set it to be True + # breakpoint() + if prior_anchor_duration_init: + # TODO: add prior anchor duration initialization, the below implementation is not correct + torch.nn.init.constant_(anchor_embed.weight.data[:, 1:], prior_duration) + anchor_embed.weight.data[:, 1:] = inverse_sigmoid(anchor_embed.weight.data[:, 1:]) + anchor_embed.weight.data[:, 1:].requires_grad = True + print('Initilize the anchor duration point with: {}'.format(prior_duration)) + reference_points = anchor_embed.weight.data.detach().clone().sigmoid().unsqueeze(0).expand(1, -1, -1) + topk_coords_unact = inverse_sigmoid(reference_points[0, :, 0]) + query_embed = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) # Position embedding receives non-sigmoided coordinates + # breakpoint() + return query_embed + + def prepare_decoder_input_anchor(self, memory, query_anchor): + bs, _, _ = memory.shape + query_embed, anchor = query_anchor + position_embedding, tgt = torch.chunk(query_embed, 2, dim=1) + position_embedding = position_embedding.unsqueeze(0).expand(bs, -1, -1) + tgt = tgt.unsqueeze(0).expand(bs, -1, -1) + reference_points = anchor.sigmoid().unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, 2) + # tgt = query_embed[..., :self.d_model] + # tgt = tgt.unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, query_dim) + init_reference_out = reference_points + + # topk_coords_unact = inverse_sigmoid(reference_points) + # position_embeding = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) + return init_reference_out, tgt, reference_points, position_embedding + + def prepare_decoder_input_prior(self, proposals, num_queries=100): + ''' + :param proposals: (batch, num_sentence, 2) + ''' + bs,_,_ = proposals.shape + # Uniformly generate normalized coordinates according to number of sentences + reference_points_list = [] + for i in range(bs): + # Generate N-1 points from 0~1 for each sentence uniformly + ns = proposals[i].shape[0] # number of sentences + reference_points_c = torch.linspace(0,1, 2*ns+1, dtype=torch.float32, device=proposals.device) + reference_points_c = reference_points_c[1:-1:2] # (num_sentence,) + reference_points_d = torch.Tensor([1.0/ns]).to(proposals.device).repeat(ns) # (num_sentence,) + reference_points = torch.stack([reference_points_c, reference_points_d], -1) # (num_sentence, 2) + # Padding the reference point to the same length + + num_query_per_sentence = num_queries // ns + reference_points = reference_points.repeat(1, num_query_per_sentence).reshape(-1,2) # (num_queries, 2) + if num_queries % ns != 0: # Padding with zeros + num_padding = num_queries - num_query_per_sentence * ns + padding = torch.Tensor([[1.0, 1.0/ns]]).to(proposals.device).repeat(num_padding, 1) + reference_points = torch.cat([reference_points, padding], 0) + reference_points_list.append(reference_points) + reference_points = torch.stack(reference_points_list, 0) # (batch, num_queries, 2) + init_reference_out = reference_points[:,:,:1] + topk_coords_unact = inverse_sigmoid(reference_points) + pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim) + query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) + return init_reference_out, tgt, reference_points[:,:,:1], query_embed + + def prepare_decoder_input_proposal(self, gt_reference_points): + ''' + :param gt_reference_points: (batch, num_sentence, 2) + ''' + #breakpoint() + topk_coords_unact = inverse_sigmoid(gt_reference_points) + reference_points = gt_reference_points + init_reference_out = reference_points + pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim) + query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) # Split to query_embed and position_embed (bs, num_sentence, hidden_dim, 2) + return init_reference_out, tgt, reference_points, query_embed + + def forward_decoder(self, *kargs): + hs, inter_references_out = self.decoder(*kargs) + return hs, inter_references_out + + +class DeformableTransformerEncoderLayer(nn.Module): + def __init__(self, + d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # self attention + self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout2 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout3 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, src): + src2 = self.linear2(self.dropout2(self.activation(self.linear1(src)))) + src = src + self.dropout3(src2) + src = self.norm2(src) + return src + + def forward(self, src, pos, reference_points, temporal_shapes, level_start_index, padding_mask=None): + # self attention + src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, temporal_shapes, level_start_index, + padding_mask) + src = src + self.dropout1(src2) + src = self.norm1(src) + + # ffn + src = self.forward_ffn(src) + + return src + + +class DeformableTransformerEncoder(nn.Module): + def __init__(self, encoder_layer, num_layers): + super().__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + + @staticmethod + def get_reference_points(temporal_shapes, valid_ratios, device): + reference_points_list = [] + for lvl, (L_) in enumerate(temporal_shapes): + ref = torch.linspace(0.5, L_ - 0.5, L_, dtype=torch.float32, device=device) + ref = ref.reshape(-1)[None] / (valid_ratios[:, None, lvl] * L_) + reference_points_list.append(ref) + reference_points = torch.cat(reference_points_list, 1) + reference_points = reference_points[:, :, None] * valid_ratios[:, None] + reference_points = reference_points[:,:,:,None] + return reference_points + + def forward(self, src, temporal_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None): + output = src + reference_points = self.get_reference_points(temporal_shapes, valid_ratios, device=src.device) + for _, layer in enumerate(self.layers): + output = layer(output, pos, reference_points, temporal_shapes, level_start_index, padding_mask) + + return output + + +class DeformableTransformerDecoderLayer(nn.Module): + def __init__(self, d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # cross attention + self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # self attention + self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout) + self.dropout2 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout3 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout4 = nn.Dropout(dropout) + self.norm3 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, tgt): + tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt)))) + tgt = tgt + self.dropout4(tgt2) + tgt = self.norm3(tgt) + return tgt + + def forward(self, tgt, query_pos, reference_points, src, src_temporal_shapes, level_start_index, + src_padding_mask=None, query_mask=None): + # self attention + q = k = self.with_pos_embed(tgt, query_pos) + tgt2 = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), tgt.transpose(0, 1), key_padding_mask=~query_mask)[ + 0].transpose(0, 1) + tgt = tgt + self.dropout2(tgt2) + tgt = self.norm2(tgt) + + # cross attention + tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos), + reference_points, + src, src_temporal_shapes, level_start_index, src_padding_mask) + tgt = tgt + self.dropout1(tgt2) + tgt = self.norm1(tgt) + + # ffn + tgt = self.forward_ffn(tgt) + return tgt + + +class DeformableTransformerDecoder(nn.Module): + def __init__(self, decoder_layer, num_layers, return_intermediate=False, d_model=256, use_anchor=False): + super().__init__() + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + self.return_intermediate = return_intermediate + # hack implementation for iterative bounding box refinement and two-stage Deformable DETR + self.bbox_head = None + self.use_anchor = use_anchor + self.d_model = d_model + # if use_anchor: + # self.anchor_head = MLP(d_model, d_model, d_model, 2) + # self.scale_head = MLP(d_model, d_model, d_model, 2) + + + def forward(self, tgt, reference_points, src, src_temporal_shapes, src_level_start_index, src_valid_ratios, + query_pos=None, src_padding_mask=None, query_padding_mask=None, disable_iterative_refine=False): + output = tgt + + intermediate = [] + intermediate_reference_points = [] + bs = tgt.shape[0] + for lid, layer in enumerate(self.layers): + if reference_points.shape[-1] == 2: + reference_points_input = reference_points[:, :, None] \ + * torch.stack([src_valid_ratios, src_valid_ratios], -1)[:, None] + else: + assert reference_points.shape[-1] == 1 + reference_points_input = reference_points[:, :, None] * src_valid_ratios[:, None, :, None] + # if self.use_anchor: + # query_sine_embed = gen_sineembed_for_position(reference_points_input[:,:,0,:], self.d_model) + # raw_query_pos = self.anchor_head(query_sine_embed) # num_query, bs, 256 + # query_scale_embed = self.scale_head(output) if lid != 0 else 1 + # query_pos = query_scale_embed * raw_query_pos + output = layer(output, query_pos, reference_points_input, src, src_temporal_shapes, src_level_start_index, + src_padding_mask, query_padding_mask) + + if self.use_anchor: + assert reference_points.shape[-1] == 2 + + # hack implementation for iterative bounding box refinement + if disable_iterative_refine: + reference_points = reference_points + else: + if (self.bbox_head is not None): + tmp = self.bbox_head[lid](output) + if reference_points.shape[-1] == 2: + new_reference_points = tmp + inverse_sigmoid(reference_points) + new_reference_points = new_reference_points.sigmoid() + else: + assert reference_points.shape[-1] == 1 + new_reference_points = tmp + new_reference_points[..., :1] = tmp[..., :1] + inverse_sigmoid(reference_points) + new_reference_points = new_reference_points.sigmoid() + reference_points = new_reference_points.detach() + else: + reference_points = reference_points + + if self.return_intermediate: + intermediate.append(output) + intermediate_reference_points.append(reference_points) + # breakpoint() + + if self.return_intermediate: + return torch.stack(intermediate), torch.stack(intermediate_reference_points) + + return output, reference_points + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(F"activation should be relu/gelu, not {activation}.") + + +def gen_sineembed_for_position(pos_tensor, d_model): + # n_query, bs, _ = pos_tensor.size() + # sineembed_tensor = torch.zeros(n_query, bs, 256) + hidden_dim = d_model // 2 + scale = 2 * math.pi + dim_t = torch.arange(hidden_dim, dtype=torch.float32, device=pos_tensor.device) + dim_t = 10000 ** (2 * (dim_t // 2) / hidden_dim) + x_embed = pos_tensor[:, :, 0] * scale + pos_x = x_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + if pos_tensor.size(-1) == 1: + pos = pos_x + elif pos_tensor.size(-1) == 2: + w_embed = pos_tensor[:, :, 1] * scale + pos_w = w_embed[:, :, None] / dim_t + pos_w = torch.stack((pos_w[:, :, 0::2].sin(), pos_w[:, :, 1::2].cos()), dim=3).flatten(2) + + pos = torch.cat((pos_x, pos_w), dim=2) + else: + raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1))) + return pos + +class MLP(nn.Module): + """ Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + +def build_deforamble_transformer(args): + return DeformableTransformer( + d_model=args.hidden_dim, + nhead=args.nheads, + num_encoder_layers=args.enc_layers, + num_decoder_layers=args.dec_layers, + dim_feedforward=args.transformer_ff_dim, + dropout=args.transformer_dropout_prob, + activation="relu", + return_intermediate_dec=True, + num_feature_levels=args.num_feature_levels, + dec_n_points=args.dec_n_points, + enc_n_points=args.enc_n_points, + use_anchor=args.use_anchor) diff --git a/anet_clip/backup/pdvc/dp/CFSA.py b/anet_clip/backup/pdvc/dp/CFSA.py new file mode 100644 index 0000000000000000000000000000000000000000..135defd0c1a48435405a27e2cc12532d86b5d79a --- /dev/null +++ b/anet_clip/backup/pdvc/dp/CFSA.py @@ -0,0 +1,327 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange, repeat +from torch.nn import CrossEntropyLoss +import math + +def compute_cls_loss(pred, labels, use_cosface=False): + if use_cosface: + # CosFace Loss + s, m = 30.0, 0.4 + cos_value = torch.diagonal(pred.transpose(0, 1)[labels]) + numerator = s * (cos_value - m) + excl = torch.cat([torch.cat((pred[i, :y], pred[i, y + 1:])).unsqueeze(0) for i, y in enumerate(labels)], dim=0) + denominator = torch.exp(numerator) + torch.sum(torch.exp(s * excl), dim=1) + L = numerator - torch.log(denominator) + loss = -torch.mean(L) + else: + # Softmax Loss + criterion = CrossEntropyLoss().cuda() + loss = criterion(pred, labels) + + return loss + + +def frame_blank_align_loss(seq_features1, seq_features2, step_num): + seq_features1 = seq_features1[:, 1:] + blank2 = seq_features2[:, :1] + seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + K = 2 * step_num + 1 + sparse_seq_features2 = torch.cat((blank2, seq_features2[:, [5, 7, 8, 9, 11, 12, 13, 14], :]), dim=1) + pred = (torch.einsum('bic,bjc->bij', seq_features1, sparse_seq_features2) / math.sqrt(C)).log_softmax(-1) + + D_pre = torch.full((B, K), fill_value=float('-99999999'), device=device) + D_pre[:, 0] = pred[:, 0, 0] + D_pre[:, 1] = pred[:, 0, 1] + + for t in range(1, T): + D_cur = torch.full((B, K), fill_value=float('-99999999'), device=device) + D_cur[:, 0] = D_pre[:, 0] + pred[:, t, 0] + D_cur[:, 1] = torch.logsumexp(torch.stack([D_pre[:, 0], D_pre[:, 1]]), dim=0) + pred[:, t, 1] + + # blank term + blank_pre_ind = torch.arange(1, K, 2)[None, :].repeat(B, 1) + blank_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_pre_ind] + + blank_cur_ind = torch.arange(2, K, 2)[None, :].repeat(B, 1) + blank_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_cur_ind] + + blank_log_prob = torch.logsumexp(torch.stack([blank_pre, blank_cur]), dim=0) + D_cur[:, 2:][:, ::2] = blank_log_prob + pred[:, t, 0][:, None].repeat(1, blank_log_prob.shape[-1]) + + # step term + step_prepre_ind = torch.arange(1, K, 2)[None, :-1].repeat(B, 1) + step_prepre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_prepre_ind] + + step_pre_ind = torch.arange(2, K, 2)[None, :-1].repeat(B, 1) + step_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_pre_ind] + + step_cur_ind = torch.arange(3, K, 2)[None, :].repeat(B, 1) + step_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_cur_ind] + + step_log_prob = torch.logsumexp(torch.stack([step_prepre, step_pre, step_cur]), dim=0) + D_cur[:, 2:][:, 1::2] = step_log_prob + pred[:, t, 2:] + D_pre = D_cur + + fsa_distance = -torch.logsumexp(D_cur[:, -2:], dim=-1) / 13 + loss = fsa_distance.mean(0) + + return loss + + +def consist_step_mining(seq_features1, seq_features2, step_num): + (B, T, C), device = seq_features1.shape, seq_features1.device + + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num - 1 + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k >= 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + repeat_times1 = (segment1[:, 1:] - segment1[:, :-1]).flatten() + repeat_target1 = torch.arange(step_num, device=device).repeat((B, )) + step_index1 = repeat_target1.repeat_interleave(repeat_times1).reshape(B, T) + + repeat_times2 = (segment2[:, 1:] - segment2[:, :-1]).flatten() + repeat_target2 = torch.arange(step_num, device=device).repeat((B, )) + step_index2 = repeat_target2.repeat_interleave(repeat_times2).reshape(B, T) + + div_term = torch.exp(torch.arange(0, C, 2, device=device) * -(math.log(10000.0) / C)) + + pos_emb1 = torch.zeros(B, T, C, device=device) + pos_emb1[:, :, 0::2] = torch.sin(step_index1.unsqueeze(-1) * div_term) + pos_emb1[:, :, 1::2] = torch.cos(step_index1.unsqueeze(-1) * div_term) + + pos_emb2 = torch.zeros(B, T, C, device=device) + pos_emb2[:, :, 0::2] = torch.sin(step_index2.unsqueeze(-1) * div_term) + pos_emb2[:, :, 1::2] = torch.cos(step_index2.unsqueeze(-1) * div_term) + + return pos_emb1, pos_emb2, segment1[:, :-1]+1, segment2[:, :-1]+1 + + + +def consist_step_mining_train(seq_features1, seq_features2, step_num, pair_labels): + # seq_features1 = seq_features1[:, 1:] + # seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] \ + - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \ + - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1) + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = D[:, k-1, None, None, :, :] + block_mat + D[:, k] = tmp.flatten(3).max(-1).values + D_ind[:, k] = tmp.flatten(3).max(-1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k > 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + final_result = D[:, :, T-1, T-1] + + video_seg1 = segment1[:, :-1] + 1 + video_seg2 = segment2[:, :-1] + 1 + + # loss_step = (-(pair_labels * final_result.max(dim=-1).values)).sum() + loss_step = -(pair_labels * final_result.max(dim=-1).values).mean() + + return loss_step, video_seg1, video_seg2 + + + +def consist_step_mining_inference(seq_features1, seq_features2, step_num): + seq_features1 = seq_features1[:, 1:] + seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + # pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] \ + - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \ + - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k > 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + return segment1[:, :-1] + 1, segment2[:, :-1] + 1 + + +def step_align_loss(seq_features1, seq_features2): + B, T, C = seq_features1.shape + # the similarity matrix: 16 * 16 + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=seq_features1.device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=pred.device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + i, j, a, b = torch.meshgrid(*[torch.arange(T, device=seq_features1.device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((a >= i) | (b >= j)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + # tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + tmp = D[:, k-1, None, None, :, :] + block_mat + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + final_result = D[:, :, T-1, T-1] + return -(final_result.max(dim=-1).values).mean(), final_result.max(dim=-1).indices, D_ind + + +def single_align_loss(seq_features1, seq_features2): + device = seq_features1.device + T, C = seq_features1.shape + pred = (torch.einsum('ic,jc->ij', seq_features1, seq_features2) / math.sqrt(C)).log_softmax(-1) + + ZERO_PAD = torch.zeros((1), device=device) + ONE_PAD = torch.ones((1), device=device) + S = seq_features2.shape[0] + + target = (torch.arange(S, device=device)) + + D_TABLE = ONE_PAD.log() + for t in range(T): + D_VEC_1 = torch.logsumexp(torch.stack([D_TABLE[1:t+1], D_TABLE[:-1][:t]]), 0) + pred[t, target[:t]] + D_VEC_2 = D_TABLE[t:t+1] + pred[t, target[t:t+1]] + D_TABLE = torch.cat([ZERO_PAD.log(), D_VEC_1, D_VEC_2], dim=-1) + # changed by hotel: remove " / s" + ctc_distance = -D_TABLE[S] + return ctc_distance + + +def frame2varstep_loss(seq_features1, seq_features2, video_seg): + B, T, C = seq_features1.shape + losses = [] + for batch in range(B): + seq_feature1 = seq_features1[batch] + + cur_seg = video_seg[batch] + cur_seg = cur_seg[:-1] + 1 + sparse_feature2 = seq_features2[batch, cur_seg, :] + frame_loss = single_align_loss(seq_feature1, sparse_feature2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1).mean(-1) + + +def frame2varstep_dist(seq_features1, seq_features2, video_seg): + B, T, C = seq_features1.shape + losses = [] + for batch in range(B): + seq_feature1 = seq_features1[batch] + + cur_seg = video_seg[batch] + cur_seg = cur_seg[:-1] + 1 + sparse_feature2 = seq_features2[batch, cur_seg, :] + frame_loss = single_align_loss(seq_feature1, sparse_feature2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1) + + +def frame2learnedstep_dist(frame_feats1, step_feats2): + B, T, C = frame_feats1.shape + losses = [] + for batch in range(B): + frame_feat1 = frame_feats1[batch] + step_feat2 = step_feats2[batch] + # step_feat2 = step_feat2[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]] + frame_loss = single_align_loss(frame_feat1, step_feat2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1) diff --git a/anet_clip/backup/pdvc/dp/__init__.py b/anet_clip/backup/pdvc/dp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd28dccf2f11d713b40d4e237cb5a055bf54ca5d Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1197f07fc41ae6f41b581ebd13f30b674234acf4 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24490f9a2f7cc151dc46f67b4d4ae214dba5c47a Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b35d663b4275176bf9f37c5dff954afd66df0e6 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fe93fd162f629560d23a2791ff3dab2c276d70c Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f2ae8f9d246202b485f89aa690174225dc2e66e Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e7d1ad496851d504c4b5de3cabed3465262cf89 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af2c6ca1bfc47fc34f69aaeee119c1c439fdea4b Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..214dc29706641783b09e447117f540f723ec6868 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d84ee83c249b2c327db4180485c62581e0bcb345 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae95ac7acddab327941068e44fcc974789c6d059 Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/dp/dp_utils.py b/anet_clip/backup/pdvc/dp/dp_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f1dcdb6e6cb0385b1862aff36c779cdda89cf563 --- /dev/null +++ b/anet_clip/backup/pdvc/dp/dp_utils.py @@ -0,0 +1,402 @@ +import numpy as np +import torch +import math + +from itertools import product +from torch import log, exp +import torch.nn.functional as F + + +device = "cuda" if torch.cuda.is_available() else "cpu" + + +def compute_all_costs( + z_features, + x_features, + gamma_xz, + drop_cost_type, + keep_percentile, + l2_normalize=False, + given_baseline_logits=None, + return_baseline=False, +): + """This function computes pairwise match and individual drop costs used in Drop-DTW + + Parameters + __________ + + sample: dict + sample dictionary + distractor: torch.tensor of size [d] or None + Background class prototype. Only used if the drop cost is learnable. + drop_cost_type: str + The type of drop cost definition, i.g., learnable or logits percentile. + keep_percentile: float in [0, 1] + if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile + l2_normalize: bool + wheather to normalize clip and step features before computing the costs + """ + + if l2_normalize: + x_features = F.normalize(x_features, p=2, dim=1) + z_features = F.normalize(z_features, p=2, dim=1) + + sim = z_features @ x_features.T + + if drop_cost_type == "logit": + if keep_percentile > 1: + baseline_logit = sim.min().detach() - 1 + else: + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + sims_ext = torch.cat([sim, baseline_logits], dim=0) + else: + assert False, f"No such drop mode {drop_cost_type}" + + softmax_sims = torch.nn.functional.softmax(sims_ext / gamma_xz, dim=0) + matching_probs, drop_probs = softmax_sims[:-1], softmax_sims[-1] + zx_costs = -torch.log(matching_probs + 1e-5) + drop_costs = -torch.log(drop_probs + 1e-5) + return zx_costs, drop_costs, drop_probs + + +def compute_double_costs( + z_features, + x_features, + gamma_xz, + drop_cost_type, + keep_percentile, + l2_normalize=False, + return_baseline=False, +): + """This function computes pairwise match and individual drop costs used in Drop-DTW + + Parameters + __________ + + sample: dict + sample dictionary + distractor: torch.tensor of size [d] or None + Background class prototype. Only used if the drop cost is learnable. + drop_cost_type: str + The type of drop cost definition, i.g., learnable or logits percentile. + keep_percentile: float in [0, 1] + if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile + l2_normalize: bool + wheather to normalize clip and step features before computing the costs + """ + + z_features, frame_features = z_features, x_features + if l2_normalize: + x_features = F.normalize(frame_features, p=2, dim=1) + z_features = F.normalize(z_features, p=2, dim=1) + sim = z_features @ x_features.T + + if drop_cost_type == "logit": + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + else: + assert False, f"No such drop mode {drop_cost_type}" + sim_ext = F.pad(sim, (0, 1, 0, 1), value=baseline_logit) + + softmax_sims = torch.nn.functional.softmax(sim_ext.reshape(-1) / gamma_xz, dim=0).reshape(sim_ext.shape) + matching_probs, x_drop_probs, z_drop_probs = softmax_sims[:-1, :-1], softmax_sims[-1, :-1], softmax_sims[:-1, -1] + zx_costs = -torch.log(matching_probs + 1e-5) + x_drop_costs = -torch.log(x_drop_probs + 1e-5) + z_drop_costs = -torch.log(z_drop_probs + 1e-5) + return zx_costs, x_drop_costs, z_drop_costs + + +class VarTable: + def __init__(self, dims, dtype=torch.float, device=device): + self.dims = dims + d1, d2, d_rest = dims[0], dims[1], dims[2:] + + self.vars = [] + for i in range(d1): + self.vars.append([]) + for j in range(d2): + var = torch.zeros(d_rest).to(dtype).to(device) + self.vars[i].append(var) + + def __getitem__(self, pos): + i, j = pos + return self.vars[i][j] + + def __setitem__(self, pos, new_val): + i, j = pos + if self.vars[i][j].sum() != 0: + assert False, "This cell has already been assigned. There must be a bug somwhere." + else: + self.vars[i][j] = self.vars[i][j] + new_val + + def show(self): + device, dtype = self[0, 0].device, self[0, 0].dtype + mat = torch.zeros((self.d1, self.d2, self.d3)).to().to(dtype).to(device) + for dims in product([range(d) for d in self.dims]): + i, j, rest = dims[0], dims[1], dims[2:] + mat[dims] = self[i, j][rest] + return mat + + +def minGamma(inputs, gamma=1, keepdim=True): + """continuous relaxation of min defined in the D3TW paper""" + if type(inputs) == list: + if inputs[0].shape[0] == 1: + inputs = torch.cat(inputs) + else: + inputs = torch.stack(inputs, dim=0) + + if gamma == 0: + minG = inputs.min(dim=0, keepdim=keepdim) + else: + # log-sum-exp stabilization trick + zi = -inputs / gamma + max_zi = zi.max() + log_sum_G = max_zi + log(exp(zi - max_zi).sum(dim=0, keepdim=keepdim) + 1e-5) + minG = -gamma * log_sum_G + return minG + + +def minProb(inputs, gamma=1, keepdim=True): + if type(inputs) == list: + if inputs[0].shape[0] == 1: + inputs = torch.cat(inputs) + else: + inputs = torch.stack(inputs, dim=0) + + if gamma == 0: + minP = inputs.min(dim=0, keepdim=keepdim) + else: + probs = F.softmax(-inputs / gamma, dim=0) + minP = (probs * inputs).sum(dim=0, keepdim=keepdim) + return minP + + +def prob_min(values, gamma_min, logits=None): + logits = values if logits is None else logits + assert len(logits) == len(values), "Values and prob logits are of different length" + + if len(values) > 1: + values = torch.cat(values, dim=-1) + logits = torch.cat(logits, dim=-1) + else: + values = values[0] + logits = logits[0] + + if gamma_min > 0: + probs = F.softmax(-logits / gamma_min, dim=-1) + else: + probs = F.one_hot(logits.argmin(), logits.size(-1)) + + if values.dim() > probs.dim(): + probs = probs[..., None, :] + + out = (values * probs).sum(-1).to(values.dtype) + return out + + +def list_min(values, keys=None): + keys = values if keys is None else keys + assert len(keys) == len(values), "Values and prob logits are of different length" + + if values[0].dim() == keys[0].dim() + 1: + dim = -2 + else: + dim = -1 + + if len(values) > 1: + values = torch.cat(values, dim=dim) + keys = torch.cat(keys, dim=-1) + else: + values = values[0] + keys = keys[0] + + onehot = F.one_hot(keys.argmin(-1), keys.size(-1)) + if values.dim() > keys.dim(): + onehot = onehot[..., None] + out = (values * onehot).sum(dim).to(values.dtype) + return out + + +def traceback(D): + i, j = np.array(D.shape) - 2 + p, q = [i], [j] + while (i > 0) or (j > 0): + tb = np.argmin((D[i, j], D[i, j + 1], D[i + 1, j])) + if tb == 0: + i -= 1 + j -= 1 + elif tb == 1: + i -= 1 + else: # (tb == 2): + j -= 1 + p.insert(0, i) + q.insert(0, j) + return np.array(p), np.array(q) + + +def diag_to_mat(diags, K, N): + mat = np.zeros([K, N]) - 123 + for d in range(len(diags)): + for r, v in enumerate(diags[d]): + j = min(d, N - 1) - r + i = d - j + mat[i, j] = v if v < 1e8 else np.inf + return mat + + +def pad_costs(zx_costs_list, drop_costs_list): + B = len(zx_costs_list) + Ns, Ks = [], [] + for i in range(B): + Ki, Ni = zx_costs_list[i].shape + if Ki >= Ni: + # in case the number of steps is greater than the number of frames, + # duplicate every frame and let the drops do the job. + mult = math.ceil(Ki / Ni) + zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1]) + drop_costs_list[i] = torch.stack([drop_costs_list[i]] * mult, dim=-1).reshape([-1]) + Ni *= mult + Ns.append(Ni) + Ks.append(Ki) + N, K = max(Ns), max(Ks) + + # preparing padded tables + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], [] + for i in range(B): + zx_costs = zx_costs_list[i] + drop_costs = drop_costs_list[i] + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # padding everything to the size of the largest N and K + row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device) + padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad])) + padded_drop_costs.append(torch.cat([drop_costs, row_pad])) + multirow_pad = torch.stack([row_pad + 9999999999] * Ks[i], dim=0) + padded_table = torch.cat([zx_costs, multirow_pad], dim=1) + rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + 9999999999 + padded_table = torch.cat([padded_table, rest_pad], dim=0) + padded_zx_costs.append(padded_table) + return padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks + + +def get_diag_coord_grid(B, d_len, num_states, d_idx): + """ + B - batch size + d - num_elements in the diagonal + num_states - number of states in DP table + d_idx - idx of the diagonal , used for marking + """ + r = torch.arange(d_len) + s = torch.arange(num_states) + d = torch.ones(d_len, num_states) * d_idx + mg = torch.stack([d, *torch.meshgrid(r, s)], dim=-1)[None, ...].repeat([B, 1, 1, 1]) + return mg + + +def diag_traceback(pointer, N, paths): + # getting rid of unnecessary elements in the batch + pointer = [int(l.item()) for l in pointer] + d, r, s = pointer + traceback = [pointer] + while d > 0: + new_pointer = [int(l.item()) for l in paths[d][r, s]] + traceback.append(new_pointer) + d, r, s = new_pointer + + # transform to rectangular coordinates + rectangular_traceback = [] + for d, r, s in traceback: + i = r + max(0, d - N + 1) + j = d - i + if i > 0 and j > 0: + rectangular_traceback.append((i, j, s)) + + return traceback, rectangular_traceback + + +def nw_diag_traceback(d, r, N, paths): + d, r = int(d.item()), int(r.item()) + traceback = [] + while d > 0: + d_1, s_1, s = [int(l.item()) for l in paths[d][r, 0]] + traceback.append((d, r, s)) + d, r = d_1, s_1 + + # transform to rectangular coordinates + rectangular_traceback = [] + for d, r, s in traceback: + i = r + max(0, d - N + 1) + j = d - i + if i > 0 and j > 0: + rectangular_traceback.append((i, j, s)) + + return traceback, rectangular_traceback + + +def compute_symmetric_cost(sim, keep_percentile=0.3): + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + zx_costs = -sim + x_drop_costs = -baseline_logits.squeeze() + z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze() + return zx_costs, x_drop_costs, z_drop_costs + + +#============ Hach from model_utilis.py in StepFormer ============# + + + +def unique_softmax(sim, labels, gamma=1, dim=0): + assert sim.shape[0] == labels.shape[0] + labels = labels.detach().cpu().numpy() + _, unique_index, unique_inverse_index = np.unique(labels, return_index=True, return_inverse=True) + unique_sim = sim[unique_index] + unique_softmax_sim = torch.nn.functional.softmax(unique_sim / gamma, dim=dim) + softmax_sim = unique_softmax_sim[unique_inverse_index] + return softmax_sim + +def compute_masked_sims(z, x, z_pad_mask, x_pad_mask, l2_normalize=False, softmax_dim=None, gamma=None): + # z ~ [B, K, d], x ~ [B, N, d] + if l2_normalize: + z, x = F.normalize(z, dim=-1), F.normalize(x, dim=-1) + pad_sims = torch.einsum("bkd,bnd->bkn", z, x) + masked_sims = [] + for i in range(x.shape[0]): + masked_sim = pad_sims[i] + masked_sim = masked_sim if z_pad_mask is None else masked_sim[~z_pad_mask[i], :] + masked_sim = masked_sim if x_pad_mask is None else masked_sim[:, ~x_pad_mask[i]] + if softmax_dim is not None: + masked_sim = F.softmax(masked_sim / gamma, dim=softmax_dim) + masked_sims.append(masked_sim) + return masked_sims + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + + +def cosine_sim(x, z): + cos_sim_fn = torch.nn.CosineSimilarity(dim=1) + return cos_sim_fn(x[..., None], z.T[None, ...]) + + +def cos_dist(x, z): + cos_sim_fn = torch.nn.CosineSimilarity(dim=1) + return (1 - cos_sim_fn(x[..., None], z.T[None, ...])) / 2 + + +def l2_dist(x, z): + dist_squared = (x**2).sum() + (z**2).sum() - 2 * x @ z.T + return torch.clamp(dist_squared, min=0).sqrt() + + +def cos_loglikelihood(x, z, gamma=0.1, z_dim=1): + cos_sim = cosine_sim(x, z) + probs = F.softmax(cos_sim / gamma, dim=z_dim) + return torch.log(probs) \ No newline at end of file diff --git a/anet_clip/backup/pdvc/dp/exact_dp.py b/anet_clip/backup/pdvc/dp/exact_dp.py new file mode 100644 index 0000000000000000000000000000000000000000..ada874b89a60799af867aab82357c8d7b442348d --- /dev/null +++ b/anet_clip/backup/pdvc/dp/exact_dp.py @@ -0,0 +1,1123 @@ +import torch +import numpy as np +import torch.nn.functional as F +from functools import partial +from copy import copy + +# from dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min +from pdvc.dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min + + +def crosstask_dp(cost_matrix, exactly_one=True, bg_cost=0): + "Algorithm used in Cross-Task to calculate Recall" + + def get_step(k): + return 0 if k % 2 == 0 else int((k + 1) / 2) + + T = cost_matrix.shape[0] + K = cost_matrix.shape[1] + K_ext = int(2 * K + 1) + + L = -np.ones([T + 1, K_ext], dtype=float) + P = -np.ones([T + 1, K_ext], dtype=float) + L[0, 0] = 0 + P[0, 0] = 0 + + for t in range(1, T + 1): + Lt = L[t - 1, :] + Pt = P[t - 1, :] + for k in range(K_ext): + s = get_step(k) + opt_label = -1 + + j = k + if (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1 and (s == 0 or not exactly_one): + opt_label = j + opt_value = Lt[j] + + j = k - 1 + if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1: + opt_label = j + opt_value = L[t - 1][j] + + if s != 0: + j = k - 2 + if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1: + opt_label = j + opt_value = Lt[j] + + if s != 0: + L[t, k] = opt_value + cost_matrix[t - 1][s - 1] + else: + L[t, k] = opt_value + bg_cost + P[t, k] = opt_label + + labels = np.zeros_like(cost_matrix) + if L[T, K_ext - 1] < L[T, K_ext - 2] or (P[T, K_ext - 2] == -1): + k = K_ext - 1 + else: + k = K_ext - 2 + for t in range(T, 0, -1): + s = get_step(k) + if s > 0: + labels[t - 1, s - 1] = 1 + k = P[t, k].astype(int) + return labels + + +def iou_based_matching(pred_seg, gt_seg, pred_step_ids, gt_step_ids, ignore_class=True): + """Performs the matching of predicted and gt sequence segments""" + pred_segments = torch.stack([pred_seg == idx for idx in pred_step_ids], 0) # [N_pred, T] + gt_segments = torch.stack([gt_seg == idx for idx in gt_step_ids], 0) # [N_gt, T] + intersection = ( + torch.logical_and(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1) + ) # [N_pred, N_gt] + union = torch.logical_or(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1) # [N_pred, N_gt] + iou = intersection / (union + 1e-5) # [N_pred, N_gt] + + C = -iou.detach().cpu().numpy().T # [N_gt, N_pred] + if not ignore_class: + print("Not ignoring class") + is_same_step_id = pred_step_ids.unsqueeze(1) == gt_step_ids.unsqueeze(0) # [N_pred, N_gt] + if is_same_step_id.shape == (1, 1): + C[0, 0] += 9999 * (~is_same_step_id[0, 0]) + else: + C[~is_same_step_id] = 9999 + + x_drop, z_drop = np.zeros(C.shape[1]), np.zeros(C.shape[0]) + labels = double_drop_dtw(C, x_drop, z_drop, one_to_many=False, many_to_one=False, return_labels=True) - 1 + indices = (np.arange(len(labels))[labels > -1], labels[labels > -1]) + return [torch.as_tensor(i, dtype=torch.int64) for i in indices] + + +def drop_dtw(zx_costs, drop_costs, exclusive=True, contiguous=True, one_to_one=False, return_labels=False): + """Drop-DTW algorithm that allows drop only from one (video) side. See Algorithm 1 in the paper. + + Parameters + ---------- + zx_costs: np.ndarray [K, N] + pairwise match costs between K steps and N video clips + drop_costs: np.ndarray [N] + drop costs for each clip + exclusive: bool + If True any clip can be matched with only one step, not many. + contiguous: bool + if True, can only match a contiguous sequence of clips to a step + (i.e. no drops in between the clips) + return_label: bool + if True, returns output directly useful for segmentation computation (made for convenience) + """ + K, N = zx_costs.shape + + # D: the dynamic programming table, which records the intermediate costs + # P: the path tracking table, which records the previous location and state (zi, xi, prev_state) + + # initialize solutin matrices + D = np.zeros([K + 1, N + 1, 2]) # the 2 last dimensions correspond to different states. + # State (dim) 0 - x is matched; State 1 - x is dropped + D[1:, 0, :] = np.inf # no drops in z in any state + D[0, 1:, 0] = np.inf # no drops in x in state 0, i.e. state where x is matched + D[0, 1:, 1] = np.cumsum(drop_costs) # drop costs initizlization in state 1 + + # initialize path tracking info for each state + P = np.zeros([K + 1, N + 1, 2, 3], dtype=int) # the last dimension records the previous location and state (zi, xi, prev_state) + for xi in range(1, N + 1): + P[0, xi, 1] = 0, xi - 1, 1 + # filling in the dynamic tables + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1] + diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states] + diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states] + + left_neigh_states = [0, 1] + left_neigh_coords = [(zi, xi - 1) for _ in left_neigh_states] + left_neigh_costs = [D[zi, xi - 1, s] for s in left_neigh_states] + + left_pos_neigh_states = [0] if contiguous else left_neigh_states + left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states] + left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] # Drop between clips is not allowed when setting `contiguous==True` (one step to sparse clips is not allowed) + + top_pos_neigh_states = [0] + top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states] + top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # state 0: matching x to z + neigh_states_pos = diag_neigh_states + neigh_coords_pos = diag_neigh_coords + neigh_costs_pos = diag_neigh_costs + if not one_to_one: + neigh_states_pos = neigh_states_pos + left_pos_neigh_states + neigh_coords_pos = neigh_coords_pos + left_pos_neigh_coords + neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + if not exclusive: # exclusive=True indicates any clip can be matched with only one step, that is, path from top is not allowed + neigh_states_pos = neigh_states_pos + top_pos_neigh_states + neigh_coords_pos = neigh_coords_pos + top_pos_neigh_coords + neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + top_pos_neigh_costs + + costs_pos = np.array(neigh_costs_pos) + zx_costs[z_cost_ind, x_cost_ind] # calculate cumulative cost in current step + opt_ind_pos = np.argmin(costs_pos) + P[zi, xi, 0] = *neigh_coords_pos[opt_ind_pos], neigh_states_pos[opt_ind_pos] # Records the last step's position (zi,xi) and state (0 or 1) + D[zi, xi, 0] = costs_pos[opt_ind_pos] # Update the minimal cumulative cost of selected path + + # state 1: x is dropped + costs_neg = np.array(left_neigh_costs) + drop_costs[x_cost_ind] + opt_ind_neg = np.argmin(costs_neg) + P[zi, xi, 1] = *left_neigh_coords[opt_ind_neg], left_neigh_states[opt_ind_neg] + D[zi, xi, 1] = costs_neg[opt_ind_neg] + + cur_state = D[K, N, :].argmin() + min_cost = D[K, N, cur_state] + #breakpoint() + + # backtracking the solution + zi, xi = K, N + path, labels = [], np.zeros(N) + x_dropped = [] if cur_state == 1 else [N] + while not (zi == 0 and xi == 0): + path.append((zi, xi)) + zi_prev, xi_prev, prev_state = P[zi, xi, cur_state] + if xi > 0: + labels[xi - 1] = zi * (cur_state == 0) # either zi or 0 + if prev_state == 1: + x_dropped.append(xi_prev) + zi, xi, cur_state = zi_prev, xi_prev, prev_state + + if not return_labels: + return min_cost, D, path, x_dropped + else: + return labels + + +def double_drop_dtw( + pairwise_zx_costs, + x_drop_costs, + z_drop_costs, + contiguous=True, + one_to_many=True, + many_to_one=True, + return_labels=False, +): + """Drop-DTW algorithm that allows drops from both sequences. See Algorithm 1 in Appendix. + + Parameters + ---------- + pairwise_zx_costs: np.ndarray [K, N] + pairwise match costs between K steps and N video clips + x_drop_costs: np.ndarray [N] + drop costs for each clip + z_drop_costs: np.ndarray [N] + drop costs for each step + contiguous: bool + if True, can only match a contiguous sequence of clips to a step + (i.e. no drops in between the clips) + """ + K, N = pairwise_zx_costs.shape + + # initialize solution matrices + D = np.zeros([K + 1, N + 1, 4]) # the 4 dimensions are the following states: zx, z-, -x, -- + # no drops allowed in zx DP. Setting the same for all DPs to change later here. + D[1:, 0, :] = 99999999 + D[0, 1:, :] = 99999999 + D[0, 0, 1:] = 99999999 + # Allow to drop x in z- and -- + D[0, 1:, 1], D[0, 1:, 3] = np.cumsum(x_drop_costs), np.cumsum(x_drop_costs) + # Allow to drop z in -x and -- + D[1:, 0, 2], D[1:, 0, 3] = np.cumsum(z_drop_costs), np.cumsum(z_drop_costs) + + # initialize path tracking info for each of the 4 DP tables: + P = np.zeros([K + 1, N + 1, 4, 3], dtype=int) # (zi, xi, prev_state) + for zi in range(1, K + 1): + P[zi, 0, 2], P[zi, 0, 3] = (zi - 1, 0, 2), (zi - 1, 0, 3) + for xi in range(1, N + 1): + P[0, xi, 1], P[0, xi, 3] = (0, xi - 1, 1), (0, xi - 1, 3) + + # filling in the dynamic tables + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1, 2, 3] # zx, z-, -x, -- + diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states] + diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states] + + left_pos_neigh_states = [0, 1] # zx and z- + left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states] + left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] + + top_pos_neigh_states = [0, 2] # zx and -x + top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states] + top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states] + + left_neg_neigh_states = [2, 3] # -x and -- + left_neg_neigh_coords = [(zi, xi - 1) for _ in left_neg_neigh_states] + left_neg_neigh_costs = [D[zi, xi - 1, s] for s in left_neg_neigh_states] + + top_neg_neigh_states = [1, 3] # z- and -- + top_neg_neigh_coords = [(zi - 1, xi) for _ in top_neg_neigh_states] + top_neg_neigh_costs = [D[zi - 1, xi, s] for s in top_neg_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # DP 0: coming to zx + neigh_states_zx = diag_neigh_states + neigh_coords_zx = diag_neigh_coords + neigh_costs_zx = diag_neigh_costs + if one_to_many: + if contiguous: + neigh_states_zx.extend(left_pos_neigh_states[0:1]) + neigh_coords_zx.extend(left_pos_neigh_coords[0:1]) + neigh_costs_zx.extend(left_pos_neigh_costs[0:1]) + else: + neigh_states_zx.extend(left_pos_neigh_states) + neigh_coords_zx.extend(left_pos_neigh_coords) + neigh_costs_zx.extend(left_pos_neigh_costs) + if many_to_one: + neigh_states_zx.extend(top_pos_neigh_states) + neigh_coords_zx.extend(top_pos_neigh_coords) + neigh_costs_zx.extend(top_pos_neigh_costs) + + costs_zx = np.array(neigh_costs_zx) + pairwise_zx_costs[z_cost_ind, x_cost_ind] + opt_ind_zx = np.argmin(costs_zx) + P[zi, xi, 0] = *neigh_coords_zx[opt_ind_zx], neigh_states_zx[opt_ind_zx] + D[zi, xi, 0] = costs_zx[opt_ind_zx] + + # DP 1: coming to z- + neigh_states_z_ = left_pos_neigh_states + neigh_coords_z_ = left_pos_neigh_coords + neigh_costs_z_ = left_pos_neigh_costs + costs_z_ = np.array(neigh_costs_z_) + x_drop_costs[x_cost_ind] + opt_ind_z_ = np.argmin(costs_z_) + P[zi, xi, 1] = *neigh_coords_z_[opt_ind_z_], neigh_states_z_[opt_ind_z_] + D[zi, xi, 1] = costs_z_[opt_ind_z_] + + # DP 2: coming to -x + neigh_states__x = top_pos_neigh_states + neigh_coords__x = top_pos_neigh_coords + neigh_costs__x = top_pos_neigh_costs + costs__x = np.array(neigh_costs__x) + z_drop_costs[z_cost_ind] + opt_ind__x = np.argmin(costs__x) + P[zi, xi, 2] = *neigh_coords__x[opt_ind__x], neigh_states__x[opt_ind__x] + D[zi, xi, 2] = costs__x[opt_ind__x] + + # DP 3: coming to -- + neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states) + # neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states + diag_neigh_states) + # adding negative left and top neighbors + neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords) + # neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords + diag_neigh_coords) + costs___ = np.concatenate( + [ + left_neg_neigh_costs + x_drop_costs[x_cost_ind], + top_neg_neigh_costs + z_drop_costs[z_cost_ind], + # diag_neigh_costs + z_drop_costs[z_cost_ind] + x_drop_costs[x_cost_ind], + ], + 0, + ) + + opt_ind___ = costs___.argmin() + P[zi, xi, 3] = *neigh_coords___[opt_ind___], neigh_states___[opt_ind___] + D[zi, xi, 3] = costs___[opt_ind___] + + cur_state = D[K, N, :].argmin() + min_cost = D[K, N, cur_state] + + # unroll path + path = [] + zi, xi = K, N + x_dropped = [N] if cur_state in [1, 3] else [] + z_dropped = [K] if cur_state in [2, 3] else [] + while not (zi == 0 and xi == 0): + path.append((zi, xi)) + zi_prev, xi_prev, prev_state = P[zi, xi, cur_state] + if prev_state in [1, 3]: + x_dropped.append(xi_prev) + if prev_state in [2, 3]: + z_dropped.append(zi_prev) + zi, xi, cur_state = zi_prev, xi_prev, prev_state + + if return_labels: + labels = np.zeros(N) + for zi, xi in path: + if zi not in z_dropped and xi not in x_dropped: + labels[xi - 1] = zi + return labels + else: + return min_cost, path, x_dropped, z_dropped + + +def batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True +): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]] + neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]] + + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + coord_left_pos, coord_left_neg = coord_left[..., [0, 1], :], coord_left[..., [2, 3], :] + coord_up_pos, coord_up_neg = coord_up[..., [0, 2], :], coord_up[..., [1, 3], :] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors_zx = [neigh_diag] + coordinates_zx = [coord_diag] + if one_to_many: + neighbors_zx.append(neigh_left_pos[..., [0]] if contiguous else neigh_left) + coordinates_zx.append(coord_left_pos[..., [0], :] if contiguous else coord_left) + if many_to_one: + neighbors_zx.append(neigh_up_pos) + coordinates_zx.append(coord_up_pos) + diag_zx = list_min(neighbors_zx) + match_costs_diag + path_zx = list_min(coordinates_zx, keys=neighbors_zx) + + # DP 1: coming to z- + neighbors_z_ = [neigh_left_pos] + coordinates_z_ = [coord_left_pos] + diag_z_ = list_min(neighbors_z_) + x_drop_costs_diag + path_z_ = list_min(coordinates_z_, keys=neighbors_z_) + + # DP 2: coming to -x + neighbors__x = [neigh_up_pos] + coordinates__x = [coord_up_pos] + diag__x = list_min(neighbors__x) + z_drop_costs_diag + path__x = list_min(coordinates__x, keys=neighbors__x) + + # DP 3: coming to -- + neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]] + coordinates___ = [coord_left_neg, coord_up_neg] + diag___ = list_min(neighbors___) + path___ = list_min(coordinates___, neighbors___) + + # Aggregating all the dimensions of DP together + diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1) + path = torch.stack([path_zx, path_z_, path__x, path___], -2) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1 + ) # [4, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 4, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)], + dim=-1, + ) # [4, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]]) + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + # current_N = Ns[orig_b.item()] + 1 + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 1], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = x1_dropcost[..., None] + diag_p_col = z1_dropcost[..., None] + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 1, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 1, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 1, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 1, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + + coord_up, coord_left, coord_diag = ( + coord_p[:, :-1].clone(), + coord_p[:, 1:].clone(), + coord_pp[:, pp_start : (pp_start + size)].clone(), + ) + # assign the right state to coordinates + coord_diag[..., 2] = 0 + coord_left[..., 2] = 1 + coord_up[..., 2] = 2 + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors = [ + neigh_diag + match_costs_diag[..., None], + neigh_left + x_drop_costs_diag[..., None], + neigh_up + z_drop_costs_diag[..., None], + ] + coordinates = [coord_diag, coord_left, coord_up] + diag = list_min(neighbors)[..., None] + path = (list_min(coordinates, keys=neighbors))[..., None, :] + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = x_drop_cost[..., None] + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(1) * (effective_d - 1), torch.zeros(1), torch.ones(1) * 1], dim=-1 + ) # [1, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 1, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = z_drop_cost[..., None] + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(1) * (effective_d - 1), torch.ones(1) * (last_r_p - 1), torch.ones(1) * 2], + dim=-1, + ) # [1, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 1, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 1, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + this_paths = [p[b.item()] for p in all_paths] + current_N = N + 1 + dc, rc, _ = coord_p[b, r][0] + tracebacks[orig_b.item()] = nw_diag_traceback(dc, rc, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def batch_drop_dtw_machine(zx_costs_list, x_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 2], device=dev) # diag at i-2 + x1_dropcost = all_cum_x_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 2, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 2, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 2, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 2, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]] + + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + coord_up_pos, coord_left_pos = coord_up[..., [0], :], coord_left[..., [0], :] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + pos_neighbors = [neigh_diag] + pos_coordinates = [coord_diag] + if one_to_many: + pos_neighbors.append(neigh_left_pos if contiguous else neigh_left) + pos_coordinates.append(coord_left_pos if contiguous else coord_left) + if many_to_one: + pos_neighbors.append(neigh_up) + pos_coordinates.append(coord_up) + diag_pos = list_min(pos_neighbors) + match_costs_diag + path_pos = list_min(pos_coordinates, keys=pos_neighbors) + + neg_neighbors = [neigh_left] + neg_coordinates = [coord_left] + diag_neg = list_min(neg_neighbors) + x_drop_costs_diag + path_neg = list_min(neg_coordinates, keys=neg_neighbors) + + diag = torch.stack([diag_pos, diag_neg], -1) + path = torch.stack([path_pos, path_neg], -2) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(2) * (effective_d - 1), torch.zeros(2), torch.arange(2)], dim=-1 + ) # [2, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 2, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + pad = torch.stack([batch_inf, batch_inf], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(2) * (effective_d - 1), torch.ones(2) * (last_r_p - 1), torch.arange(2)], + dim=-1, + ) # [2, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 2, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_cum_x_drop_costs, batch_inf = [ + t[~mask] for t in [all_x_drop_costs, all_cum_x_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def fast_batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True +): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + + # create routing masks for selection + # 4x3 corresponds to 4 states (zx, z-, -x, --) and 3 neighbors (l, d, u) + zx_mask = torch.zeros((4, 3)) + zx_mask[:, 1] = 1 + if one_to_many: + zx_mask[0, 0] = 1 + if not contiguous: + zx_mask[1, 0] = 1 + if many_to_one: + zx_mask[[0, 2], 2] = 1 + + z__mask = torch.zeros((4, 3)) + z__mask[[0, 1], 0] = 1 + + _x_mask = torch.zeros((4, 3)) + _x_mask[[0, 2], 2] = 1 + + ___mask = torch.zeros((4, 3)) + ___mask[[2, 3], 0] = 1 + ___mask[[1, 3], 2] = 1 + + mask = torch.stack([zx_mask, z__mask, _x_mask, ___mask], dim=-1).to(dev).to(dtype) # [4, 3, 4] + + def transition( + neigh_left, neigh_diag, neigh_up, coord_left, coord_diag, coord_up, match_costs, x_drop_costs, z_drop_costs + ): + all_neigh = torch.stack([neigh_left, neigh_diag, neigh_up], dim=-1) # [B, d, 4, 3] + all_coords = torch.stack([coord_left, coord_diag, coord_up], dim=-1).permute( + [0, 1, 3, 2, 4] + ) # [B, d, 3, 4, 3], the first 3 is the spatial dimension of coordinates + additions_zx = match_costs[..., None].repeat([1, 1, 3]) # [B, d, 3] + additions_z_ = x_drop_costs[..., None].repeat([1, 1, 3]) + additions__x = z_drop_costs[..., None].repeat([1, 1, 3]) + additions___ = torch.stack([x_drop_costs, match_costs, z_drop_costs], dim=-1) + additions = torch.stack([additions_zx, additions_z_, additions__x, additions___], dim=-1) # [B, d, 3, 4] + + inverse_mask = (~(mask[None, None, ...].to(bool))).to(dtype) + filtered_costs = all_neigh[..., None] * mask[None, None, ...] + inverse_mask * inf[0] # [B, d, 4, 3, 4] + full_costs = filtered_costs + additions[:, :, None, :, :] * mask[None, None, ...] + B, d = full_costs.shape[:2] + the_min = full_costs.reshape([B, d, -1, 4]).min(dim=2) + new_diag = the_min.values + + all_coords = all_coords[..., None].repeat([1, 1, 1, 1, 1, 4]).reshape([B, d, 3, -1, 4]) + argmins = the_min.indices[:, :, None, None, :].repeat([1, 1, 3, 1, 1]) + pointers = torch.gather(all_coords, index=argmins, dim=-2) + pointers = pointers[:, :, :, 0, :].permute([0, 1, 3, 2]) + return new_diag, pointers + + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + diag, path = transition( + neigh_left, + neigh_diag, + neigh_up, + coord_left, + coord_diag, + coord_up, + match_costs_diag, + x_drop_costs_diag, + z_drop_costs_diag, + ) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1 + ) # [4, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 4, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)], + dim=-1, + ) # [4, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + local_mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(local_mask, as_tuple=False)[:, 0], Rs[local_mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]]) + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + # current_N = Ns[orig_b.item()] + 1 + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~local_mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~local_mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~local_mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +if __name__ == '__main__': + zx_costs = np.random.rand(3, 4) # K=3 steps, N=4 clips + # zx_costs = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) + drop_costs = np.random.rand(4) + align = drop_dtw(zx_costs, drop_costs) + #breakpoint() diff --git a/anet_clip/backup/pdvc/dp/soft_dp.py b/anet_clip/backup/pdvc/dp/soft_dp.py new file mode 100644 index 0000000000000000000000000000000000000000..9d5c17e5d5eeff50254dc7b8d31f6d43b253e388 --- /dev/null +++ b/anet_clip/backup/pdvc/dp/soft_dp.py @@ -0,0 +1,617 @@ +import numpy as np +import torch +import math +from torch import log, exp +import torch.nn.functional as F +from copy import copy + +from pdvc.dp.dp_utils import VarTable, minGamma, minProb, pad_costs, prob_min, unique_softmax, cosine_sim + + +device = "cuda" if torch.cuda.is_available() else "cpu" + + +def softDTW( + step_features, + frame_features, + labels, + dist_type="inner", + softning="prob", + gamma_min=0.1, + gamma_xz=0.1, + step_normalize=True, +): + """function to obtain a soft (differentiable) version of DTW + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the function + _min_fn = minProb if softning == "prob" else minGamma + min_fn = lambda x: _min_fn(x, gamma=gamma_min) + + # first get a pairwise distance matrix + if dist_type == "inner": + dist = step_features @ frame_features.T + else: + dist = cosine_sim(step_features, frame_features) + if step_normalize: + if labels is not None: + norm_dist = unique_softmax(dist, labels, gamma_xz) + else: + norm_dist = torch.softmax(dist / gamma_xz, 0) + dist = -log(norm_dist) + + # initialize soft-DTW table + nrows, ncols = dist.shape + # sdtw = torch.zeros((nrows+1,ncols+1)).to(torch.float).to(device) + sdtw = VarTable((nrows + 1, ncols + 1)) + for i in range(1, nrows + 1): + sdtw[i, 0] = 9999999999 + for j in range(1, ncols + 1): + sdtw[0, j] = 9999999999 + + # obtain dtw table using min_gamma or softMin relaxation + for i in range(1, nrows + 1): + for j in range(1, ncols + 1): + neighbors = torch.stack([sdtw[i, j - 1], sdtw[i - 1, j - 1], sdtw[i - 1, j]]) + di, dj = i - 1, j - 1 # in the distance matrix indices are shifted by one + new_val = dist[di, dj] + min_fn(neighbors) + sdtw[i, j] = torch.squeeze(new_val, 0) + sdtw_loss = sdtw[nrows, ncols] / step_features.shape[0] + return sdtw_loss, sdtw, dist + + +def dropDTW(zx_costs, drop_costs, softning="prob", exclusive=True, contiguous=True, gamma_min=1): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the min function + min_fn = minProb if softning == "prob" else minGamma + inf = 9999999999 + K, N = zx_costs.shape + exclusive = exclusive if K <= N else False + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 3)) # This corresponds to B 3-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf + for xi in range(1, N + 1): + D[0, xi] = torch.zeros_like(D[0, xi]) + cum_drop_costs[xi - 1] + + # obtain dtw table using min_gamma or softMin relaxation + for zi in range(1, K + 1): + for xi in range(1, N + 1): + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1] + dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3] + + # positive transition, i.e. matching x_i to z_j + if contiguous: + pos_neighbors = [d_diag, dp_left] + else: + pos_neighbors = [d_diag, d_left] + if not exclusive: + pos_neighbors.append(dp_up) + + Dp = min_fn(pos_neighbors, gamma=gamma_min) + zx_costs[z_cost_ind, x_cost_ind] + + # negative transition, i.e. dropping xi + Dm = d_left + drop_costs[x_cost_ind] + + # update final solution matrix + D_final = min_fn([Dm, Dp], gamma=gamma_min) + D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0) + + # Computing the final min cost for the whole batch + min_cost = D[K, N][0] + return min_cost, D + + +def batch_dropDTW( + zx_costs_list, drop_costs_list, softning="prob", exclusive=True, contiguous=True, drop_mode="DropDTW", gamma_min=1 +): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the min function + min_fn = minProb if softning == "prob" else minGamma + inf = 9999999999 + + # pre-processing + B = len(zx_costs_list) + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list) + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + N, K = max(Ns), max(Ks) + + # preparing padded tables + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], [] + for i in range(B): + zx_costs = zx_costs_list[i] + drop_costs = drop_costs_list[i] + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # padding everything to the size of the largest N and K + row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device) + padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad])) + padded_drop_costs.append(torch.cat([drop_costs, row_pad])) + multirow_pad = torch.stack([row_pad + inf] * Ks[i], dim=0) + padded_table = torch.cat([zx_costs, multirow_pad], dim=1) + rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + inf + padded_table = torch.cat([padded_table, rest_pad], dim=0) + padded_zx_costs.append(padded_table) + + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 3, B)) # This corresponds to B 3-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf + for xi in range(1, N + 1): + if drop_mode == "DropDTW": + D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi] + elif drop_mode == "OTAM": + D[0, xi] = torch.zeros_like(D[0, xi]) + else: # drop_mode == 'DTW' + D[0, xi] = torch.zeros_like(D[0, xi]) + inf + + # obtain dtw table using min_gamma or softMin relaxation + for zi in range(1, K + 1): + for xi in range(1, N + 1): + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1] + dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3] + + if drop_mode == "DropDTW": + # positive transition, i.e. matching x_i to z_j + if contiguous: + pos_neighbors = [d_diag, dp_left] + else: + pos_neighbors = [d_diag, d_left] + if not exclusive: + pos_neighbors.append(dp_up) + + Dp = min_fn(pos_neighbors, gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind] + + # negative transition, i.e. dropping xi + Dm = d_left + all_drop_costs[x_cost_ind] + + # update final solution matrix + D_final = min_fn([Dm, Dp], gamma=gamma_min) + else: + d_right = D[zi - 1, xi][0:1] + D_final = Dm = Dp = ( + min_fn([d_diag, d_left, d_right], gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind] + ) + D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0) + + # Computing the final min cost for the whole batch + min_costs = [] + for i in range(B): + Ni, Ki = Ns[i], Ks[i] + min_cost_i = D[Ki, Ni][0, i] + min_costs.append(min_cost_i / Ni) + + return min_costs, D + + +def batch_double_dropDTW(zx_costs_list, drop_costs_list, gamma_min=1): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + min_fn = lambda x: minProb(x, gamma=gamma_min) + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + + # assuming sequences are the same length + B = len(zx_costs_list) + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list) + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + N, K = max(Ns), max(Ks) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 4, B), dtype, dev) # This corresponds to B 4-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + all_cum_drop_costs[(zi - 1) : zi] + for xi in range(1, N + 1): + D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi] + + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1, 2, 3] # zx, z-, -x, -- + diag_neigh_costs = [D[zi - 1, xi - 1][s] for s in diag_neigh_states] + + left_neigh_states = [0, 1] # zx and z- + left_neigh_costs = [D[zi, xi - 1][s] for s in left_neigh_states] + + upper_neigh_states = [0, 2] # zx and -x + upper_neigh_costs = [D[zi - 1, xi][s] for s in upper_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # DP 0: coming to zx + neigh_costs_zx = diag_neigh_costs + upper_neigh_costs + left_neigh_costs + D0 = min_fn(neigh_costs_zx) + all_zx_costs[z_cost_ind, x_cost_ind] + + # DP 1: coming to z- + neigh_costs_z_ = left_neigh_costs + D1 = min_fn(neigh_costs_z_) + all_drop_costs[x_cost_ind] + + # DP 2: coming to -x + neigh_costs__x = upper_neigh_costs + D2 = min_fn(neigh_costs__x) + all_drop_costs[z_cost_ind] + + # DP 3: coming to -- + costs___ = [d + all_drop_costs[z_cost_ind] * 2 for d in diag_neigh_costs] + [ + D[zi, xi - 1][3] + all_drop_costs[x_cost_ind], + D[zi - 1, xi][3] + all_drop_costs[z_cost_ind], + ] + D3 = min_fn(costs___) + + D[zi, xi] = torch.cat([D0, D1, D2, D3], dim=0) + + # Computing the final min cost for the whole batch + min_costs = [] + for i in range(B): + min_cost_i = min_fn(D[K, N][:, i]) + min_costs.append(min_cost_i / N) + return min_costs, D + + +def drop_dtw_machine(zx_costs, drop_costs, gamma_min=1, exclusive=True, contiguous=True): + K, N = zx_costs.shape + dev = zx_costs.device + flipped_costs = torch.flip(zx_costs, [0]) # flip the cost matrix upside down + cum_drop_costs = torch.cumsum(drop_costs, dim=-1) + + # initialize first two contr diagonals + inf = torch.tensor([9999999999], device=dev, dtype=zx_costs.dtype) + diag_pp = torch.zeros([1, 2], device=dev) # diag at i-2 + diag_p_col = torch.ones([1, 2], device=dev) * inf + diag_p_row = torch.stack([inf, cum_drop_costs[[0]]], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 0) # diag at i-1 + + for i in range(K + N - 1): + size = diag_p.size(0) - 1 + pp_start = max(0, diag_pp.size(0) - diag_p.size(0)) + neigh_up, neigh_left, neigh_diag = diag_p[:-1], diag_p[1:], diag_pp[pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[:, [0]], neigh_left[:, [0]] + + # define match and drop cost vectors + match_costs_diag = torch.flip(torch.diag(flipped_costs, i + 1 - K), [-1]) + d_start, d_end = max(1 - K + i, 0), min(i, N - 1) + 1 + drop_costs_diag = torch.flip(drop_costs[d_start:d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left] + if not exclusive: + pos_neighbors.append(neigh_up_pos) + diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag + diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag + diag = torch.stack([diag_pos, diag_neg], -1) + + # add the initialization values on the ends of diagonal if needed + if i < N - 1: + # fill in 0th row with [drop_cost, inf] + pad = torch.stack([inf, cum_drop_costs[[i + 1]]], -1) + diag = torch.cat([pad, diag]) + if i < K - 1: + # fill in 0th col with [inf, inf] + pad = torch.stack([inf, inf], -1) + diag = torch.cat([diag, pad]) + + diag_pp = diag_p + diag_p = diag + assert (diag.size(0) == 1) and (diag.size(1) == 2), f"Last diag shape is {diag.shape} instead of [1, 2]" + + cost = prob_min(diag, gamma_min) + return cost + + +def batch_drop_dtw_machine(zx_costs_list, drop_costs_list, gamma_min=1, exclusive=True, contiguous=True): + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + # For samples where K > N, exclusive computation is not possible + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + persample_exclusive = torch.tensor([Ni >= Ki for Ki, Ni in shapes]).to(dev) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # define costs in tensors + all_zx_costs = [F.pad(c, [0, N - c.shape[1], 0, K - c.shape[0]]) for c in zx_costs_list] + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0) + all_cum_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. Here, 0 is keep, 1 is drop. + """ + # initialize first two contr diagonals + batch_inf, batch_ones = torch.stack([inf] * B, 0), torch.ones([B, 1], device=dev, dtype=dtype) + diag_pp = torch.zeros([B, 1, 2], device=dev) # diag at i-2 + diag_p_col = torch.ones([B, 1, 2], device=dev) * batch_inf[..., None] + diag_p_row = torch.stack([batch_inf, all_cum_drop_costs[:, [0]]], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The pathlength path is also a diagonal representation that carries the optimal pathlength to each point + with torch.no_grad(): + path_pp = torch.zeros([B, 1, 2], device=dev, dtype=dtype) + path_p = torch.ones([B, 2, 2], device=dev, dtype=dtype) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) + path_lens = torch.zeros(B).to(dtype=dtype).to(device=dev) + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]] + + neigh_path_up, neigh_path_left, neigh_path_diag = ( + path_p[:, :-1], + path_p[:, 1:], + path_pp[:, pp_start : (pp_start + size)], + ) + neigh_path_up_pos, neigh_path_left_pos = neigh_path_up[..., [0]], neigh_path_left[..., [0]] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + d_start, d_end = max(1 - K + d, 0), min(d, N - 1) + 1 + drop_costs_diag = torch.flip(all_drop_costs[:, d_start:d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left] + pos_path_neighbors = ( + [neigh_path_diag, neigh_path_left_pos] if contiguous else [neigh_path_diag, neigh_path_left] + ) + if exclusive and (~persample_exclusive).any(): + # apply non-exclusive rule for some batch elements, via masing out the exclusive elements with inf + masked_neigh_up_pos = neigh_up_pos + persample_exclusive[:, None, None] * batch_inf[:, None] + pos_neighbors.append(masked_neigh_up_pos) + + pos_path_neighbors.append(neigh_path_up_pos * (~persample_exclusive[:, None, None])) + elif not exclusive: + # apply standard non-exclusive rule to all batch elements + pos_neighbors.append(neigh_up_pos) + pos_path_neighbors.append(neigh_path_up_pos) + + # DP Table update + diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag + diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag + diag = torch.stack([diag_pos, diag_neg], -1) + + # Path Table Update + with torch.no_grad(): + path_pos = prob_min(pos_path_neighbors, gamma_min, pos_neighbors) + 1 + path_neg = prob_min([neigh_path_left], gamma_min, [neigh_left]) + 1 + path = torch.stack([path_pos, path_neg], -1) + + # add the initialization values on the ends of diagonal if needed + if d < N - 1: + # fill in DP table's 0th row with [drop_cost, inf] + pad_d = torch.stack([batch_inf, all_cum_drop_costs[:, [d + 1]]], -1) + diag = torch.cat([pad_d, diag], 1) + + # fill in Path table's 0th row with [d, inf] + pad_p = torch.stack([batch_inf, torch.zeros_like(batch_inf) + d], -1) + path = torch.cat([pad_p, path], 1) + + if d < K - 1: + # fill in DP table's 0th col with [inf, inf] + pad_d = torch.stack([batch_inf, batch_inf], -1) + diag = torch.cat([diag, pad_d], 1) + + # fill in Path table's 0th row with [d, inf] + pad_p = pad_d + path = torch.cat([path, pad_p], 1) + + diag_pp = diag_p + diag_p = diag + + path_pp = path_p + path_p = path + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min) + path_lens[orig_mask] = path_lens[orig_mask] + prob_min([path[bs, rs]], gamma_min, [diag[bs, rs]]) + + diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs] + ] + all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive = [ + t[~mask] for t in [all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive] + ] + if torch.numel(Ds) == 0: + break + + # costs = prob_min([diag], gamma_min) + costs_norm = min_costs / path_lens + return min_costs, path_lens + + +def batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, gamma_min=1, exclusive=True, contiguous=True +): + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + Ns, Ks = [], [] + for i in range(B): + Ki, Ni = zx_costs_list[i].shape + if exclusive and Ki >= Ni: + # in case the number of steps is greater than the number of frames, + # duplicate every frame and let the drops do the job. + mult = math.ceil(Ki / Ni) + zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1]) + x_drop_costs_list[i] = torch.stack([x_drop_costs_list[i]] * mult, dim=-1).reshape([-1]) + Ni *= mult + Ns.append(Ni) + Ks.append(Ki) + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]] + neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors_zx = [neigh_diag, neigh_left_pos[..., [0]]] if contiguous else [neigh_diag, neigh_left_pos] + if not exclusive: + neighbors_zx.append(neigh_up_pos) + diag_zx = prob_min(neighbors_zx, gamma_min) + match_costs_diag + + # DP 1: coming to z- + neighbors_z_ = [neigh_left_pos] + diag_z_ = prob_min(neighbors_z_, gamma_min) + x_drop_costs_diag + + # DP 2: coming to -x + neighbors__x = [neigh_up_pos] + diag__x = prob_min(neighbors__x, gamma_min) + z_drop_costs_diag + + # DP 3: coming to -- + neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]] + diag___ = prob_min(neighbors___, gamma_min) + + # Aggregating all the dimensions of DP together + diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + if d < N - 1: + # fill in 0th row with [drop_cost, inf] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([pad, diag], 1) + if d < K - 1: + # fill in 0th col with [inf, inf] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], 1) + + diag_pp = diag_p + diag_p = diag + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min) + + # filtering out already processed elements + diag, diag_p, diag_pp, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs = [ + t[~mask] for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs] + ] + + if torch.numel(Ds) == 0: + break + + costs_norm = min_costs / torch.tensor(Ns).to(dev) + return costs_norm + + +if __name__ == "__main__": + from exact_dp import double_drop_dtw + + K, N = 7, 15 + zx_costs = torch.normal(torch.ones([K, N])) + x_drop_costs = zx_costs.mean(0) + z_drop_costs = zx_costs.mean(1) + + min_cost, *_ = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy()) + my_costs = batch_double_drop_dtw_machine([zx_costs], [x_drop_costs], [z_drop_costs], gamma_min=0) + print(my_costs * N, min_cost) diff --git a/anet_clip/backup/pdvc/dp/visualization.py b/anet_clip/backup/pdvc/dp/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..fed631a8979532253408fb402672eee0cc4a9a64 --- /dev/null +++ b/anet_clip/backup/pdvc/dp/visualization.py @@ -0,0 +1,179 @@ +import io +import numpy as np +from matplotlib import pyplot as plt +from matplotlib.pyplot import figure +from PIL import Image + + +# defining the colors and shapes +color_code = [ + "blue", + "orange", + "green", + "red", + "purple", + "brown", + "pink", + "grey", + "olive", + "cyan", + "lime", + "grey", + "firebrick", + "coral", + "chocolate", + "saddlebrown", + "bisque", + "goldenrod", + "gold", + "khaki", + "darkolivegreen", + "greenyellow", + "palegreen", + "springgreen", + "aquamarine", + "teal", + "deepskyblue", + "navy", + "mediumslateblue", + "royalblue", + "indigo", + "magenta", + "deeppink", + "crimson", + "violet", + "snow", + "lightgrey", + "wheat", + "dodgerblue", + "darkseagreen", +] +color_code = color_code * 10 +shape_code = ["o", "s", "P", "*", "h", ">", "X", "d", "D", "v", "<", "p"] +shape_code = shape_code * int(len(color_code) / len(shape_code) + 1) + +color_values = [] +for color in color_code: + _ = plt.fill([0, 0, 1, 1, 0], [0, 1, 1, 0, 0], color) + buf = io.BytesIO() + _ = plt.savefig(buf, format="png") + _ = plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + color_values.append(img[100, 300]) + +color_code_hex = [] +for color_value in color_values: + step_color_rgb = tuple([s.item() for s in color_value]) + color_code_hex.append("#%02x%02x%02x" % step_color_rgb) + + +def plot_alignment( + step_ids, frame_labels, step_colors, step_shapes, size=(15, 2), name="all_step_to_video", to_np=True, grid_on=True +): + N_steps = len(frame_labels) + + plt.rcParams["figure.figsize"] = (size[0], size[1]) + ax = plt.subplot(1, 1, 1) + _ = ax.set_title(name) + + tick_freq = 50 if N_steps > 1500 else 20 + _ = plt.xticks(np.arange(0, N_steps, tick_freq)) + _ = plt.xlim(0, N_steps) + _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True) + + if grid_on: + _ = plt.grid() + else: + plt.plot(np.arange(len(frame_labels)), [1] * len(frame_labels), color="grey") + + for si, step_id in enumerate(step_ids): + time, val = [], [] + for i in range(N_steps): + if si + 1 == frame_labels[i]: + time.append(i) + val.append(1) + time, val = np.array(time), np.array(val) + _ = plt.plot(time, val, step_shapes[step_id], color=step_colors[step_id]) + + if to_np: + buf = io.BytesIO() + plt.savefig(buf, format="png") + plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + return img + else: + return plt + + +def plot_step_to_video_alignment(corresp_mat, size=(15, 2)): + """corresp_mat is of shape [K, N], where K is num_steps, and N is video_len""" + step_ids = np.arange(corresp_mat.size(0)) + 1 + labels = corresp_mat.to(float).argmax(0) + 1 * corresp_mat.to(bool).any(0) + + K_present = corresp_mat.to(bool).any(1).to(int).sum().item() + name = f"Video Segmentation | {K_present} steps present" + return plot_alignment(step_ids, labels, color_code, shape_code, name=name, size=size) + + +def plot_similarities( + sim, + drop_line=None, + colors=None, + select=None, + color_offset=0, + do_legend=True, + name="", + size=(15, 2), + grid_on=True, + to_np=True, + linewidth=1, +): + colors = colors if colors is not None else color_code + K, N = sim.shape + select = select if select is not None else np.arange(K) + + plt.rcParams["figure.figsize"] = (size[0], size[1]) + ax = plt.subplot(1, 1, 1) + _ = ax.set_title(name) + + _ = plt.xticks(np.arange(0, N, 20)) + _ = plt.xlim(0, N) + _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True) + if grid_on: + _ = plt.grid() + + for i in range(K): + if i in select: + _ = plt.plot(np.arange(N), sim[i], color=colors[i + color_offset], label=str(i), linewidth=linewidth) + + if drop_line is not None: + _ = plt.plot(np.arange(N), drop_line * np.ones(N), "--") + + if do_legend: + _ = plt.xlim(0, N + int(0.10 * N)) + plt.legend() + + if to_np: + buf = io.BytesIO() + plt.savefig(buf, format="png") + plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + return img + else: + return plt + + +def plot_gt_seg(N, starts, ends, colors=None, shapes=None, name="GT Seg", clip_len=1, size=(15, 2), grid_on=True): + colors = colors if colors is not None else color_code + shapes = shapes if shapes is not None else shape_code + + K = len(starts) + labels = -np.ones(N) + for i in range(K): + s, e = int(starts[i]), int(ends[i]) + labels[s : e + 1] = i + step_ids = np.arange(K) + return plot_alignment(step_ids, labels, colors, shapes, to_np=False, name=name, size=size, grid_on=grid_on) diff --git a/anet_clip/backup/pdvc/matcher.py b/anet_clip/backup/pdvc/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..3311680756df6cf1efeed2bbe2ab55350525b4ce --- /dev/null +++ b/anet_clip/backup/pdvc/matcher.py @@ -0,0 +1,446 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Modules to compute the matching cost and solve the corresponding LSAP. +""" +import torch +from scipy.optimize import linear_sum_assignment +from torch import nn +import torch.nn.functional as F +from torch import log, exp +import numpy as np + +from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou + +# For matcher_align +from pdvc.dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine +from pdvc.dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine +from pdvc.dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine +from pdvc.dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine +# from dp.gpu_nw import gpu_nw +from pdvc.dp.dp_utils import compute_all_costs, compute_double_costs + + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + +class HungarianMatcher(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + + For efficiency reasons, the targets don't include the no_object. Because of this, in general, + there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, + while the others are un-matched (and thus treated as non-objects). + """ + + def __init__(self, + cost_class: float = 1, + cost_bbox: float = 1, + cost_giou: float = 1, + cost_alpha = 0.25, + cost_gamma = 2, + use_pseudo_box = False): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the classification error in the matching cost + cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost + cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_bbox = cost_bbox + self.cost_giou = cost_giou + # self.cost_caption = cost_caption + self.cost_alpha = cost_alpha + self.cost_gamma = cost_gamma + self.use_pseudo_box = use_pseudo_box + + assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0 # or cost_caption!=0, "all costs cant be 0" + # breakpoint() + + def forward(self, outputs, targets, verbose=False, many_to_one=False): + """ Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth + objects in the target) containing the class labels + "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + with torch.no_grad(): + bs, num_queries = outputs["pred_logits"].shape[:2] + # We flatten to compute the cost matrices in a batch + out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid() + out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] + + # Also concat the target labels and boxes + tgt_ids = torch.cat([v["labels"] for v in targets]) + if self.use_pseudo_box and self.training: + # print('use pseudo box') + tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets]) + else: + tgt_bbox = torch.cat([v["boxes"] for v in targets]) + # print('use gt box') + + # Compute the classification cost. + # alpha = 0.25 + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + + # Compute the L1 cost between boxes + cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) + # breakpoint() + + # Compute the giou cost betwen boxes + try: + cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox), + box_cl_to_xy(tgt_bbox)) + except: + print('out_bbox', out_bbox) + print('tgt_bbox', tgt_bbox) + breakpoint() + + # cost_caption = outputs['caption_costs'].flatten(0, 1) + + # Final cost matrix + # breakpoint() + try: # [100, 10], [100, 11], [100, 10] + C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + except: + breakpoint() + + costs = {'cost_bbox': cost_bbox, + 'cost_class': cost_class, + 'cost_giou': cost_giou, + # 'cost_caption': cost_caption, + 'out_bbox': out_bbox[:, 0::2]} + + if verbose: + print('\n') + print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0]) + print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0]) + print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0]) + # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0]) + + C = C.view(bs, num_queries, -1).cpu() + + + sizes = [len(v["boxes_pseudo"]) for v in targets] if self.use_pseudo_box else [len(v["boxes"]) for v in targets] + # pdb.set_trace() + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] + m2o_rate = 4 + rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))] + rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in + enumerate(rl_indices)] + + indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] + + if verbose: + print('------matching results:') + print(indices) + for indice in indices: + for i, j in zip(*indice): + print(out_bbox[i][0::2], tgt_bbox[j][0::2]) + print('-----topK scores:') + topk_indices = out_prob.topk(10, dim=0) + print(topk_indices) + for i,(v,ids) in enumerate(zip(*topk_indices)): + print('top {}'.format(i)) + s= '' + for name,cost in costs.items(): + s += name + ':{} '.format(cost[ids]) + print(s) + + return indices, rl_indices + +class DTWMatcher(nn.Module): + ''' + Drop_z: if True, then we drop both the x axis (query) and z axis (text) + One_to_many: multiple x match to one z + Many_to_one: multiple z match to one x + ''' + def __init__(self, + keep_percentile, + top_band_size=0, + given_droplines=None, + drop_z=True, + one_to_many=False, + many_to_one=False, + contiguous=False): + super().__init__() + self.keep_percentile = keep_percentile + self.top_band_size = top_band_size + self.given_droplines = given_droplines + self.drop_z = drop_z + self.one_to_many = one_to_many + self.many_to_one = many_to_one + self.contiguous = contiguous + + def forward(self, ouputs, targets, text_embed, event_embed): + # computing alignments (without gradients) + orig_device = event_embed[0].device + # embarisingly, this is faster on CPU than on GPU! + sims = compute_sim(text_embed, event_embed, l2_norm=True) + #sims = [s.cpu() for s in sims] + sims = [sims.cpu()] + # TODO: Add the classification cost the the alignment cost + self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines] + with torch.no_grad(): + zx_costs_list = [] + x_drop_costs_list = [] + z_drop_costs_list = [] + for i, sim in enumerate(sims): + # computing the baseline logit + top_sim = sim + if self.given_droplines is None: + if self.top_band_size > 0 and self.top_band_size < sim.shape[1]: + top_sim = sim.topk(self.top_band_size, dim=1).values + + if self.keep_percentile > 1: + dropline = top_sim.min() - 5 + else: + k = max([1, int(torch.numel(top_sim) * self.keep_percentile)]) + dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach() + else: + dropline = self.given_droplines[i] + + # shift the costs by the drop logits, so I can set drop costs to 0 instead + zx_costs_list.append(dropline.reshape([1, 1]) - sim) + z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device) + x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device) + z_drop_costs_list.append(z_drop_cost) + x_drop_costs_list.append(x_drop_cost) + + # TODO figure out if one_to_many and many_to_one should be on + align_paths, corresp_mats = None, None + if self.drop_z: + if not (self.one_to_many or self.many_to_one): + _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + else: + _, align_paths = exact_batch_double_drop_dtw_machine( + # _, align_paths = fast_batch_double_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + z_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + else: + _, align_paths = exact_batch_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + + if corresp_mats is None: + corresp_matrices = [] + for b_id, sim in enumerate(sims): + corresp_matrix = torch.zeros_like(sim) + for i, j, s in align_paths[b_id]: + if s == 0: + corresp_matrix[i - 1, j - 1] = 1 + corresp_matrices.append(corresp_matrix.to(orig_device)) + # corresp_matrices.append(corresp_matrix) + text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0]) + query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0]) + text_indices, rearrange = torch.sort(text_indices) + query_indices = query_indices[rearrange] + indices = [(query_indices, text_indices)] + #return align_paths, corresp_matrices + return indices, [] + +class SimMatcher(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + based on the similarity bewteen text embedding and query embedding + """ + def __init__(self, + cost_class: float = 1, + cost_sim: float = 1, + cost_bbox: float = 1, + cost_giou: float = 1, + cost_alpha = 0.25, + cost_gamma = 2, + use_pseudo_box = False): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the classification error in the matching cost + cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_sim = cost_sim + self.cost_bbox = cost_bbox + self.cost_giou = cost_giou + # self.cost_caption = cost_caption + self.cost_alpha = cost_alpha + self.cost_gamma = cost_gamma + self.use_pseudo_box = use_pseudo_box + + assert cost_class != 0 or cost_sim!=0, "all costs cannot be 0" + # breakpoint() + + def forward(self, outputs, targets, text_embed, event_embed, verbose=False, many_to_one=False): + """ Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth + objects in the target) containing the class labels + "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + with torch.no_grad(): + bs, num_queries = outputs["pred_logits"].shape[:2] + + # We flatten to compute the cost matrices in a batch + out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid() + out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] + + tgt_ids = torch.cat([v["labels"] for v in targets]) + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + + # Also concat the target labels and boxes + # breakpoint() + if self.use_pseudo_box: + tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets]) + # Compute the L1 cost between boxes + cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) + + # Compute the giou cost betwen boxes + cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox), + box_cl_to_xy(tgt_bbox)) + else: + cost_bbox = torch.zeros_like(cost_class) + cost_giou = torch.zeros_like(cost_class) + + # Compute the classification cost. + # alpha = 0.25 + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + # breakpoint() + # Compute the similarity cost + cost_sim = compute_sim(text_embed, event_embed, l2_norm=True).permute(1,0) + cost_sim = torch.ones_like(cost_sim) - cost_sim + # breakpoint() + + # cost_caption = outputs['caption_costs'].flatten(0, 1) + + # Final cost matrix + C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_sim * cost_sim + + costs = {'cost_bbox': cost_bbox, + 'cost_class': cost_class, + 'cost_giou': cost_giou, + 'cost_sim': cost_sim, + # 'cost_caption': cost_caption, + 'out_bbox': out_bbox[:, 0::2], + } + + if verbose: + print('\n') + print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0]) + print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0]) + print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0]) + print(self.cost_sim, cost_sim.var(dim=0), cost_sim.max(dim=0)[0] - cost_sim.min(dim=0)[0]) + # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0]) + + C = C.view(bs, num_queries, -1).cpu() + + sizes = [text_embed.size(0)] + # pdb.set_trace() + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] + m2o_rate = 4 + rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))] + rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in + enumerate(rl_indices)] + + indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] + + + return indices, rl_indices + +def build_matcher(args): + if args.matcher_type == 'DTW': + return DTWMatcher(keep_percentile=args.align_keep_percentile, + top_band_size=args.align_top_band_size, + given_droplines=None, + drop_z=args.align_drop_z, + one_to_many=args.align_one_to_many, + many_to_one=args.align_many_to_one, + contiguous=args.align_contiguous) + elif args.matcher_type == 'Sim': + return SimMatcher(cost_class=args.set_cost_class, + cost_sim=args.set_cost_sim, + cost_bbox=args.set_cost_bbox, + cost_giou=args.set_cost_giou, + cost_alpha = args.cost_alpha, + cost_gamma = args.cost_gamma, + use_pseudo_box = args.use_pseudo_box + ) + else: + return HungarianMatcher(cost_class=args.set_cost_class, + cost_bbox=args.set_cost_bbox, + cost_giou=args.set_cost_giou, + cost_alpha = args.cost_alpha, + cost_gamma = args.cost_gamma, + use_pseudo_box = args.use_pseudo_box + ) + + +def build_matcher_simple(): + #return DTWMatcher(keep_percentile=0.5) + return SimMatcher() + +if __name__ == '__main__': + text_embed = torch.rand(5, 128) + event_embed = torch.rand(15, 128) + #sim = torch.eye(3, 4) + aligner = build_matcher_simple() + indices, matrices = aligner(text_embed, event_embed) + breakpoint() \ No newline at end of file diff --git a/anet_clip/backup/pdvc/matcher_align.py b/anet_clip/backup/pdvc/matcher_align.py new file mode 100644 index 0000000000000000000000000000000000000000..e9b93dce7e9ff252230fbb8f8bc2861ce3a16605 --- /dev/null +++ b/anet_clip/backup/pdvc/matcher_align.py @@ -0,0 +1,154 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Modules to compute the matching cost and solve the corresponding LSAP. +""" +import torch +import torch.nn.functional as F +from torch import log, exp +import numpy as np +from torch import nn +from scipy.optimize import linear_sum_assignment +# from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou + +# For matcher_align +from dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine +from dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine +from dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine +from dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine +# from dp.gpu_nw import gpu_nw +from dp.dp_utils import compute_all_costs, compute_double_costs + + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + +class DTWMatcher(nn.Module): + ''' + Drop_z: if True, then we drop both the x axis (query) and z axis (text) + One_to_many: multiple x match to one z + Many_to_one: multiple z match to one x + ''' + def __init__(self, + keep_percentile, + top_band_size=0, + given_droplines=None, + drop_z=False, + one_to_many=False, + many_to_one=False, + contiguous=False): + super().__init__() + self.keep_percentile = keep_percentile + self.top_band_size = top_band_size + self.given_droplines = given_droplines + self.drop_z = drop_z + self.one_to_many = one_to_many + self.many_to_one = many_to_one + self.contiguous = contiguous + + def forward(self, text_embed, event_embed): + # computing alignments (without gradients) + orig_device = event_embed.device + # embarisingly, this is faster on CPU than on GPU! + sims = compute_sim(text_embed, event_embed, l2_norm=True) + #sims = [s.cpu() for s in sims] + sims = [sims.cpu()] + self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines] + with torch.no_grad(): + zx_costs_list = [] + x_drop_costs_list = [] + z_drop_costs_list = [] + for i, sim in enumerate(sims): + # computing the baseline logit + top_sim = sim + if self.given_droplines is None: + if self.top_band_size > 0 and self.top_band_size < sim.shape[1]: + top_sim = sim.topk(self.top_band_size, dim=1).values + + if self.keep_percentile > 1: + dropline = top_sim.min() - 5 + else: + k = max([1, int(torch.numel(top_sim) * self.keep_percentile)]) + dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach() + else: + dropline = self.given_droplines[i] + + # shift the costs by the drop logits, so I can set drop costs to 0 instead + zx_costs_list.append(dropline.reshape([1, 1]) - sim) + z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device) + x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device) + z_drop_costs_list.append(z_drop_cost) + x_drop_costs_list.append(x_drop_cost) + + # TODO figure out if one_to_many and many_to_one should be on + align_paths, corresp_mats = None, None + if self.drop_z: + if not (self.one_to_many or self.many_to_one): + _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + else: + _, align_paths = exact_batch_double_drop_dtw_machine( + # _, align_paths = fast_batch_double_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + z_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + else: + _, align_paths = exact_batch_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + + if corresp_mats is None: + corresp_matrices = [] + for b_id, sim in enumerate(sims): + corresp_matrix = torch.zeros_like(sim) + for i, j, s in align_paths[b_id]: + if s == 0: + corresp_matrix[i - 1, j - 1] = 1 + corresp_matrices.append(corresp_matrix.to(orig_device)) + # corresp_matrices.append(corresp_matrix) + text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0]) + query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0]) + text_indices, rearrange = torch.sort(text_indices) + query_indices = query_indices[rearrange] + indices = [(query_indices, text_indices)] + #return align_paths, corresp_matrices + return indices, _ + +def build_matcher(args): + return DTWMatcher(keep_percentile=args.align_keep_percentile, + top_band_size=args.align_top_band_size, + given_droplines=None, + drop_z=args.align_drop_z, + one_to_many=args.align_one_to_many, + many_to_one=args.align_many_to_one, + contiguous=args.align_contiguous) + + +def build_matcher_simple(): + return DTWMatcher(keep_percentile=0.5) + +if __name__ == '__main__': + text_embed = torch.rand(5, 128) + event_embed = torch.rand(15, 128) + #sim = torch.eye(3, 4) + aligner = build_matcher_simple() + indices, matrices = aligner(text_embed, event_embed) + breakpoint() diff --git a/anet_clip/backup/pdvc/modules/UniVL_mini.py b/anet_clip/backup/pdvc/modules/UniVL_mini.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9d6e960cc742b2eed92827f568734ae91073ce --- /dev/null +++ b/anet_clip/backup/pdvc/modules/UniVL_mini.py @@ -0,0 +1,1292 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +import copy +import math +import logging +import collections +import unicodedata +import os +from urllib.parse import urlparse +from typing import Optional, Tuple, Union, IO, Callable, Set +from pathlib import Path +import shutil +import tempfile +import json +from hashlib import sha256 +from functools import wraps +import boto3 +from botocore.exceptions import ClientError +import requests +from tqdm import tqdm + + +import torch +from torch import nn + + +logger = logging.getLogger(__name__) + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + +PRETRAINED_MODEL_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz", +} + +CONFIG_NAME = 'bert_config.json' +WEIGHTS_NAME = 'pytorch_model.bin' + +PRETRAINED_VOCAB_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", +} +PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = { + 'base-uncased': 512, + 'large-uncased': 512, + 'base-cased': 512, + 'large-cased': 512, + 'base-multilingual-uncased': 512, + 'base-multilingual-cased': 512, + 'base-chinese': 512, +} +VOCAB_NAME = 'vocab.txt' + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r", encoding="utf-8") as reader: + while True: + token = reader.readline() + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + +def split_s3_path(url: str) -> Tuple[str, str]: + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + +def s3_request(func: Callable): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url: str, *args, **kwargs): + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise FileNotFoundError("file {} not found".format(url)) + else: + raise + + return wrapper + +@s3_request +def s3_etag(url: str) -> Optional[str]: + """Check ETag on S3 object.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + +@s3_request +def s3_get(url: str, temp_file: IO) -> None: + """Pull a file directly from S3.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + +def url_to_filename(url: str, etag: str = None) -> str: + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the url's, delimited + by a period. + """ + url_bytes = url.encode('utf-8') + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode('utf-8') + etag_hash = sha256(etag_bytes) + filename += '.' + etag_hash.hexdigest() + + return filename + +def http_get(url: str, temp_file: IO) -> None: + req = requests.get(url, stream=True) + content_length = req.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + +def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str: + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + os.makedirs(cache_dir, exist_ok=True) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + response = requests.head(url, allow_redirects=True) + if response.status_code != 200: + raise IOError("HEAD request failed for url {} with status code {}" + .format(url, response.status_code)) + etag = response.headers.get("ETag") + + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, 'wb') as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {'url': url, 'etag': etag} + meta_path = cache_path + '.json' + with open(meta_path, 'w') as meta_file: + json.dump(meta, meta_file) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + +def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str: + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ('http', 'https', 's3'): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == '': + # File, but it doesn't exist. + raise FileNotFoundError("file {} not found".format(url_or_filename)) + else: + # Something unknown + raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class BertTokenizer(object): + """Runs end-to-end tokenization: punctuation splitting""" + + def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")): + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)) + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + self.max_len = max_len if max_len is not None else int(1e12) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Converts a sequence of tokens into ids using the vocab.""" + ids = [] + for token in tokens: + if token not in self.vocab: + ids.append(self.vocab["[UNK]"]) + logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token)) + else: + ids.append(self.vocab[token]) + if len(ids) > self.max_len: + raise ValueError( + "Token indices sequence length is longer than the specified maximum " + " sequence length for this BERT model ({} > {}). Running this" + " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) + ) + return ids + + def convert_ids_to_tokens(self, ids): + """Converts a sequence of ids in tokens using the vocab.""" + tokens = [] + for i in ids: + tokens.append(self.ids_to_tokens[i]) + return tokens + + @classmethod + def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedBertModel from a pre-trained model file. + Download and cache the pre-trained model file if needed. + """ + vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(vocab_file) is False: + if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP: + vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name] + else: + vocab_file = pretrained_model_name + if os.path.isdir(vocab_file): + vocab_file = os.path.join(vocab_file, VOCAB_NAME) + # redirect to the cache, if necessary + print(vocab_file) + try: + resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir) + except FileNotFoundError: + logger.error( + "Model name '{}' was not found. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + vocab_file)) + return None + if resolved_vocab_file == vocab_file: + logger.info("loading vocabulary file {}".format(vocab_file)) + else: + logger.info("loading vocabulary file {} from cache at {}".format( + vocab_file, resolved_vocab_file)) + if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP: + # if we're using a pretrained model, ensure the tokenizer wont index sequences longer + # than the number of positional embeddings + max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name] + kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len) + kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]") + + # Instantiate tokenizer. + tokenizer = cls(resolved_vocab_file, *inputs, **kwargs) + + return tokenizer + + def add_tokens(self, new_tokens, model): + """ + Add a list of new tokens to the tokenizer class. If the new tokens are not in the + vocabulary, they are added to it with indices starting from length of the current vocabulary. + Args: + new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them). + Returns: + Number of tokens added to the vocabulary. + Examples:: + # Let's see how to increase the vocabulary of Bert model and tokenizer + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained('bert-base-uncased') + num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2']) + print('We have added', num_added_toks, 'tokens') + model.resize_token_embeddings(len(tokenizer)) # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer. + """ + + to_add_tokens = [] + for token in new_tokens: + assert isinstance(token, str) + to_add_tokens.append(token) + # logger.info("Adding %s to the vocabulary", token) + + vocab = collections.OrderedDict() + for token in self.vocab.keys(): + vocab[token] = self.vocab[token] + for token in to_add_tokens: + vocab[token] = len(vocab) + self.vocab = self.wordpiece_tokenizer.vocab = vocab + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + + model.resize_token_embeddings(new_num_tokens=len(vocab)) + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + self.never_split = never_split + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case and token not in self.never_split: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + if text in self.never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + +class WordpieceTokenizer(object): + """Runs WordPiece tokenization.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer`. + + Returns: + A list of wordpiece tokens. + """ + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False + + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +def swish(x): + return x * torch.sigmoid(x) + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish} + +class LayerNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-12): + """Construct a layernorm module in the TF style (epsilon inside the square root). + """ + super(LayerNorm, self).__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.bias = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward(self, x): + u = x.mean(-1, keepdim=True) + s = (x - u).pow(2).mean(-1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.variance_epsilon) + return self.weight * x + self.bias + +class PretrainedConfig(object): + + pretrained_model_archive_map = {} + config_name = "" + weights_name = "" + + @classmethod + def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None): + archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(archive_file) is False: + if pretrained_model_name in cls.pretrained_model_archive_map: + archive_file = cls.pretrained_model_archive_map[pretrained_model_name] + else: + archive_file = pretrained_model_name + + # redirect to the cache, if necessary + try: + resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir) + except FileNotFoundError: + if task_config is None or task_config.local_rank == 0: + logger.error( + "Model name '{}' was not found in model name list. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + archive_file)) + return None + if resolved_archive_file == archive_file: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {}".format(archive_file)) + else: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {} from cache at {}".format( + archive_file, resolved_archive_file)) + tempdir = None + if os.path.isdir(resolved_archive_file): + serialization_dir = resolved_archive_file + else: + # Extract archive to temp dir + tempdir = tempfile.mkdtemp() + if task_config is None or task_config.local_rank == 0: + logger.info("extracting archive file {} to temp dir {}".format( + resolved_archive_file, tempdir)) + with tarfile.open(resolved_archive_file, 'r:gz') as archive: + archive.extractall(tempdir) + serialization_dir = tempdir + # Load config + config_file = os.path.join(serialization_dir, cls.config_name) + config = cls.from_json_file(config_file) + config.type_vocab_size = type_vocab_size + if task_config is None or task_config.local_rank == 0: + logger.info("Model config {}".format(config)) + + if state_dict is None: + weights_path = os.path.join(serialization_dir, cls.weights_name) + if os.path.exists(weights_path): + state_dict = torch.load(weights_path, map_location='cpu') + else: + if task_config is None or task_config.local_rank == 0: + logger.info("Weight doesn't exsits. {}".format(weights_path)) + + if tempdir: + # Clean up temp dir + shutil.rmtree(tempdir) + + return config, state_dict + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = cls(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + +class BertConfig(PretrainedConfig): + """Configuration class to store the configuration of a `BertModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs BertConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class PreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, config, *inputs, **kwargs): + super(PreTrainedModel, self).__init__() + # if not isinstance(config, PretrainedConfig): + # raise ValueError( + # "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " + # "To create a model from a Google pretrained model use " + # "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( + # self.__class__.__name__, self.__class__.__name__ + # )) + self.config = config + + def init_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, LayerNorm): + if 'beta' in dir(module) and 'gamma' in dir(module): + module.beta.data.zero_() + module.gamma.data.fill_(1.0) + else: + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + def resize_token_embeddings(self, new_num_tokens=None): + raise NotImplementedError + + @classmethod + def init_preweight(cls, model, state_dict, prefix=None, task_config=None): + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + if prefix is not None: + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + old_keys.append(key) + new_keys.append(prefix + key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(model, prefix='') + + if prefix is None and (task_config is None or task_config.local_rank == 0): + logger.info("-" * 20) + if len(missing_keys) > 0: + logger.info("Weights of {} not initialized from pretrained model: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(missing_keys))) + if len(unexpected_keys) > 0: + logger.info("Weights from pretrained model not used in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(unexpected_keys))) + if len(error_msgs) > 0: + logger.error("Weights from pretrained model cause errors in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(error_msgs))) + + return model + + @property + def dtype(self): + """ + :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype). + """ + try: + return next(self.parameters()).dtype + except StopIteration: + # For nn.DataParallel compatibility in PyTorch 1.5 + def find_tensor_attributes(module: nn.Module): + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = self._named_members(get_members_fn=find_tensor_attributes) + first_tuple = next(gen) + return first_tuple[1].dtype + + @classmethod + def from_pretrained(cls, config, state_dict=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict. + Download and cache the pre-trained model file if needed. + """ + # Instantiate model. + model = cls(config, *inputs, **kwargs) + if state_dict is None: + return model + model = cls.init_preweight(model, state_dict) + + return model + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + layer = BertLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class BertPooler(nn.Module): + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(bert_model_embedding_weights.size(1), + bert_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = bert_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + +class BertModel(PreTrainedModel): + """BERT model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a BertConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see BERT's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) + ``` + """ + def __init__(self, config): + super(BertModel, self).__init__(config) + self.config = config + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output + + +def build_UniVL_text_encoder(dict): + bert_config = BertConfig.from_dict(dict) + bert = BertModel(bert_config) + + return bert + +def build_UniVL_tokenizer(): + return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + + + +def load_pretrained_UniVL(args, device, n_gpu, local_rank, init_model=None): + + if init_model: + model_state_dict = torch.load(init_model, map_location='cpu') + else: + model_state_dict = None + + # Prepare model + cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed') + model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base', + cache_dir=cache_dir, state_dict=model_state_dict, task_config=args) + + model.to(device) + + return model + +if __name__ == '__main__': + bert_config_dict = { + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522 + } + tokenizer = build_UniVL_tokenizer() + bert = build_UniVL_text_encoder(bert_config_dict) + words = ["[CLS]"] + ['you', 'love', 'you'] + ["[SEP]"] + #input_ids = tokenizer.convert_tokens_to_ids(words) + #masked_tokens = words.copy() + #masked_token_ids = tokenizer.convert_tokens_to_ids(masked_tokens) + token_type_ids = None + breakpoint() + encoded_layers, _ = bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True) + sequence_output = encoded_layers[-1] + diff --git a/anet_clip/backup/pdvc/modules/__init__.py b/anet_clip/backup/pdvc/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d34e0a02cf990fffc878b695beee9637074e33d0 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..874dd9210e523da3f66f8b15054a96cadeee908f Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2030617dd39a30551bcda930768bb5af198af31 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3218f01ae734e108885fd322ff9db4dc73b204fe Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0585085b54395651e7fa6b8fb60d877f579733ce Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f39ca45e9cc3f91242f4e039001dc5f6f2636af Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93a08af0acd6f720e525203d381fe91f1bc3b33f Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6631deadc8c18f93e755eca7dd975b6ce83b6ca1 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5148122f4202468a675012274b2eead3a84a1510 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff40d3cabb4bad221fc02eb703a9b76971c01709 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64204d60a2c8da0639a86e05802791c7a65e4c17 Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/modules/beam.py b/anet_clip/backup/pdvc/modules/beam.py new file mode 100644 index 0000000000000000000000000000000000000000..eff1d961ef393e03a3c9105022b1047f5ea7133d --- /dev/null +++ b/anet_clip/backup/pdvc/modules/beam.py @@ -0,0 +1,116 @@ +""" +Manage beam search info structure. +Heavily borrowed from OpenNMT-py. +For code in OpenNMT-py, please check the following link (maybe in oldest version): +https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py +""" + +import torch + +class Constants(): + def __init__(self): + self.PAD = 0 + self.UNK = 1 + self.BOS = 2 + self.EOS = 3 + self.PAD_WORD = '[PAD]' + self.UNK_WORD = '[UNK]' + self.BOS_WORD = '[CLS]' + self.EOS_WORD = '[SEP]' + + @classmethod + def from_tokenizer(cls, tokenizer): + instance = cls() + instance.PAD = tokenizer.vocab[instance.PAD_WORD] + instance.UNK = tokenizer.vocab[instance.UNK_WORD] + instance.BOS = tokenizer.vocab[instance.BOS_WORD] + instance.EOS = tokenizer.vocab[instance.EOS_WORD] + return instance + +class Beam(): + ''' Beam search ''' + + def __init__(self, size, device=False, tokenizer=None): + if tokenizer is None: + self.constants = Constants() + else: + self.constants = Constants.from_tokenizer(tokenizer) + + self.size = size + self._done = False + # The score for each interface on the beam. + self.scores = torch.zeros((size,), dtype=torch.float, device=device) + self.all_scores = [] + + # The backpointers at each time-step. + self.prev_ks = [] + + # The outputs at each time-step. + self.next_ys = [torch.full((size,), self.constants.BOS, dtype=torch.long, device=device)] + + def get_current_state(self): + "Get the outputs for the current timestep." + return self.get_tentative_hypothesis() + + def get_current_origin(self): + "Get the backpointers for the current timestep." + return self.prev_ks[-1] + + @property + def done(self): + return self._done + + def advance(self, word_prob, word_length=None): + + "Update beam status and check if finished or not." + num_words = word_prob.size(1) + # Sum the previous scores. + if len(self.prev_ks) > 0: + beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) + else: + beam_lk = word_prob[0] + flat_beam_lk = beam_lk.view(-1) + best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort + self.all_scores.append(self.scores) + self.scores = best_scores + # bestScoresId is flattened as a (beam x word) array, + # so we need to calculate which word and beam each score came from + prev_k = best_scores_id // num_words + self.prev_ks.append(prev_k) + self.next_ys.append(best_scores_id - prev_k * num_words) + # End condition is when top-of-beam is EOS. + if self.next_ys[-1][0].item() == self.constants.EOS: + self._done = True + + return self._done + + def sort_scores(self): + "Sort the scores." + return torch.sort(self.scores, 0, True) + + def get_the_best_score_and_idx(self): + "Get the score of the best in the beam." + scores, ids = self.sort_scores() + return scores[1], ids[1] + + def get_tentative_hypothesis(self): + "Get the decoded sequence for the current timestep." + + if len(self.next_ys) == 1: + dec_seq = self.next_ys[0].unsqueeze(1) + else: + _, keys = self.sort_scores() + hyps = [self.get_hypothesis(k) for k in keys] + hyps = [[self.constants.BOS] + h for h in hyps] + dec_seq = torch.LongTensor(hyps) + + return dec_seq + + def get_hypothesis(self, k): + """ Walk back to construct the full hypothesis. """ + hyp = [] + for j in range(len(self.prev_ks) - 1, -1, -1): + hyp.append(self.next_ys[j+1][k]) + k = self.prev_ks[j][k] + + return list(map(lambda x: x.item(), hyp[::-1])) diff --git a/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json b/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fca794a5f07ff8f963fe8b61e3694b0fb7f955df --- /dev/null +++ b/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json @@ -0,0 +1,13 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522 +} diff --git a/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt b/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..fb140275c155a9c7c5a3b3e0e77a9e839594a938 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt @@ -0,0 +1,30522 @@ +[PAD] +[unused0] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[UNK] +[CLS] +[SEP] +[MASK] +[unused99] +[unused100] +[unused101] +[unused102] +[unused103] +[unused104] +[unused105] +[unused106] +[unused107] +[unused108] +[unused109] +[unused110] +[unused111] +[unused112] +[unused113] +[unused114] +[unused115] +[unused116] +[unused117] +[unused118] +[unused119] +[unused120] +[unused121] +[unused122] +[unused123] +[unused124] +[unused125] +[unused126] +[unused127] +[unused128] +[unused129] +[unused130] +[unused131] +[unused132] +[unused133] +[unused134] +[unused135] +[unused136] +[unused137] +[unused138] +[unused139] +[unused140] +[unused141] +[unused142] +[unused143] +[unused144] +[unused145] +[unused146] +[unused147] +[unused148] +[unused149] +[unused150] +[unused151] +[unused152] +[unused153] +[unused154] +[unused155] +[unused156] +[unused157] +[unused158] +[unused159] +[unused160] +[unused161] +[unused162] +[unused163] +[unused164] +[unused165] +[unused166] +[unused167] +[unused168] +[unused169] +[unused170] +[unused171] +[unused172] +[unused173] +[unused174] +[unused175] +[unused176] +[unused177] +[unused178] +[unused179] +[unused180] +[unused181] +[unused182] +[unused183] +[unused184] +[unused185] +[unused186] +[unused187] +[unused188] +[unused189] +[unused190] +[unused191] +[unused192] +[unused193] +[unused194] +[unused195] +[unused196] +[unused197] +[unused198] +[unused199] +[unused200] +[unused201] +[unused202] +[unused203] +[unused204] +[unused205] +[unused206] +[unused207] +[unused208] +[unused209] +[unused210] +[unused211] +[unused212] +[unused213] +[unused214] +[unused215] +[unused216] +[unused217] +[unused218] +[unused219] +[unused220] +[unused221] +[unused222] +[unused223] +[unused224] +[unused225] +[unused226] +[unused227] +[unused228] +[unused229] +[unused230] +[unused231] +[unused232] +[unused233] +[unused234] +[unused235] +[unused236] +[unused237] +[unused238] +[unused239] +[unused240] +[unused241] +[unused242] +[unused243] +[unused244] +[unused245] +[unused246] +[unused247] +[unused248] +[unused249] +[unused250] +[unused251] +[unused252] +[unused253] +[unused254] +[unused255] +[unused256] +[unused257] +[unused258] +[unused259] +[unused260] +[unused261] +[unused262] +[unused263] +[unused264] +[unused265] +[unused266] +[unused267] +[unused268] +[unused269] +[unused270] +[unused271] +[unused272] +[unused273] +[unused274] +[unused275] +[unused276] +[unused277] +[unused278] +[unused279] +[unused280] +[unused281] +[unused282] +[unused283] +[unused284] +[unused285] +[unused286] +[unused287] +[unused288] +[unused289] +[unused290] +[unused291] +[unused292] +[unused293] +[unused294] +[unused295] +[unused296] +[unused297] +[unused298] +[unused299] +[unused300] +[unused301] +[unused302] +[unused303] +[unused304] +[unused305] +[unused306] +[unused307] +[unused308] +[unused309] +[unused310] +[unused311] +[unused312] +[unused313] +[unused314] +[unused315] +[unused316] +[unused317] +[unused318] +[unused319] +[unused320] +[unused321] +[unused322] +[unused323] +[unused324] +[unused325] +[unused326] +[unused327] +[unused328] +[unused329] +[unused330] +[unused331] +[unused332] +[unused333] +[unused334] +[unused335] +[unused336] +[unused337] +[unused338] +[unused339] +[unused340] +[unused341] +[unused342] +[unused343] +[unused344] +[unused345] +[unused346] +[unused347] +[unused348] +[unused349] +[unused350] +[unused351] +[unused352] +[unused353] +[unused354] +[unused355] +[unused356] +[unused357] +[unused358] +[unused359] +[unused360] +[unused361] +[unused362] +[unused363] +[unused364] +[unused365] +[unused366] +[unused367] +[unused368] +[unused369] +[unused370] +[unused371] +[unused372] +[unused373] +[unused374] +[unused375] +[unused376] +[unused377] +[unused378] +[unused379] +[unused380] +[unused381] +[unused382] +[unused383] +[unused384] +[unused385] +[unused386] +[unused387] +[unused388] +[unused389] +[unused390] +[unused391] +[unused392] +[unused393] +[unused394] +[unused395] +[unused396] +[unused397] +[unused398] +[unused399] +[unused400] +[unused401] +[unused402] +[unused403] +[unused404] +[unused405] +[unused406] +[unused407] +[unused408] +[unused409] +[unused410] +[unused411] +[unused412] +[unused413] +[unused414] +[unused415] +[unused416] +[unused417] +[unused418] +[unused419] +[unused420] +[unused421] +[unused422] +[unused423] +[unused424] +[unused425] +[unused426] +[unused427] +[unused428] +[unused429] +[unused430] +[unused431] +[unused432] +[unused433] +[unused434] +[unused435] +[unused436] +[unused437] +[unused438] +[unused439] +[unused440] +[unused441] +[unused442] +[unused443] +[unused444] +[unused445] +[unused446] +[unused447] +[unused448] +[unused449] +[unused450] +[unused451] +[unused452] +[unused453] +[unused454] +[unused455] +[unused456] +[unused457] +[unused458] +[unused459] +[unused460] +[unused461] +[unused462] +[unused463] +[unused464] +[unused465] +[unused466] +[unused467] +[unused468] +[unused469] +[unused470] +[unused471] +[unused472] +[unused473] +[unused474] +[unused475] +[unused476] +[unused477] +[unused478] +[unused479] +[unused480] +[unused481] +[unused482] +[unused483] +[unused484] +[unused485] +[unused486] +[unused487] +[unused488] +[unused489] +[unused490] +[unused491] +[unused492] +[unused493] +[unused494] +[unused495] +[unused496] +[unused497] +[unused498] +[unused499] +[unused500] +[unused501] +[unused502] +[unused503] +[unused504] +[unused505] +[unused506] +[unused507] +[unused508] +[unused509] +[unused510] +[unused511] +[unused512] +[unused513] +[unused514] +[unused515] +[unused516] +[unused517] +[unused518] +[unused519] +[unused520] +[unused521] +[unused522] +[unused523] +[unused524] +[unused525] +[unused526] +[unused527] +[unused528] +[unused529] +[unused530] +[unused531] +[unused532] +[unused533] +[unused534] +[unused535] +[unused536] +[unused537] +[unused538] +[unused539] +[unused540] +[unused541] +[unused542] +[unused543] +[unused544] +[unused545] +[unused546] +[unused547] +[unused548] +[unused549] +[unused550] +[unused551] +[unused552] +[unused553] +[unused554] +[unused555] +[unused556] +[unused557] +[unused558] +[unused559] +[unused560] +[unused561] +[unused562] +[unused563] +[unused564] +[unused565] +[unused566] +[unused567] +[unused568] +[unused569] +[unused570] +[unused571] +[unused572] +[unused573] +[unused574] +[unused575] +[unused576] +[unused577] +[unused578] +[unused579] +[unused580] +[unused581] +[unused582] +[unused583] +[unused584] +[unused585] +[unused586] +[unused587] +[unused588] +[unused589] +[unused590] +[unused591] +[unused592] +[unused593] +[unused594] +[unused595] +[unused596] +[unused597] +[unused598] +[unused599] +[unused600] +[unused601] +[unused602] +[unused603] +[unused604] +[unused605] +[unused606] +[unused607] +[unused608] +[unused609] +[unused610] +[unused611] +[unused612] +[unused613] +[unused614] +[unused615] +[unused616] +[unused617] +[unused618] +[unused619] +[unused620] +[unused621] +[unused622] +[unused623] +[unused624] +[unused625] +[unused626] +[unused627] +[unused628] +[unused629] +[unused630] +[unused631] +[unused632] +[unused633] +[unused634] +[unused635] +[unused636] +[unused637] +[unused638] +[unused639] +[unused640] +[unused641] +[unused642] +[unused643] +[unused644] +[unused645] +[unused646] +[unused647] +[unused648] +[unused649] +[unused650] +[unused651] +[unused652] +[unused653] +[unused654] +[unused655] +[unused656] +[unused657] +[unused658] +[unused659] +[unused660] +[unused661] +[unused662] +[unused663] +[unused664] +[unused665] +[unused666] +[unused667] +[unused668] +[unused669] +[unused670] +[unused671] +[unused672] +[unused673] +[unused674] +[unused675] +[unused676] +[unused677] +[unused678] +[unused679] +[unused680] +[unused681] +[unused682] +[unused683] +[unused684] +[unused685] +[unused686] +[unused687] +[unused688] +[unused689] +[unused690] +[unused691] +[unused692] +[unused693] +[unused694] +[unused695] +[unused696] +[unused697] +[unused698] +[unused699] +[unused700] +[unused701] +[unused702] +[unused703] +[unused704] +[unused705] +[unused706] +[unused707] +[unused708] +[unused709] +[unused710] +[unused711] +[unused712] +[unused713] +[unused714] +[unused715] +[unused716] +[unused717] +[unused718] +[unused719] +[unused720] +[unused721] +[unused722] +[unused723] +[unused724] +[unused725] +[unused726] +[unused727] +[unused728] +[unused729] +[unused730] +[unused731] +[unused732] +[unused733] +[unused734] +[unused735] +[unused736] +[unused737] +[unused738] +[unused739] +[unused740] +[unused741] +[unused742] +[unused743] +[unused744] +[unused745] +[unused746] +[unused747] +[unused748] +[unused749] +[unused750] +[unused751] +[unused752] +[unused753] +[unused754] +[unused755] +[unused756] +[unused757] +[unused758] +[unused759] +[unused760] +[unused761] +[unused762] +[unused763] +[unused764] +[unused765] +[unused766] +[unused767] +[unused768] +[unused769] +[unused770] +[unused771] +[unused772] +[unused773] +[unused774] +[unused775] +[unused776] +[unused777] +[unused778] +[unused779] +[unused780] +[unused781] +[unused782] +[unused783] +[unused784] +[unused785] +[unused786] +[unused787] +[unused788] +[unused789] +[unused790] +[unused791] +[unused792] +[unused793] +[unused794] +[unused795] +[unused796] +[unused797] +[unused798] +[unused799] +[unused800] +[unused801] +[unused802] +[unused803] +[unused804] +[unused805] +[unused806] +[unused807] +[unused808] +[unused809] +[unused810] +[unused811] +[unused812] +[unused813] +[unused814] +[unused815] +[unused816] +[unused817] +[unused818] +[unused819] +[unused820] +[unused821] +[unused822] +[unused823] +[unused824] +[unused825] +[unused826] +[unused827] +[unused828] +[unused829] +[unused830] +[unused831] +[unused832] +[unused833] +[unused834] +[unused835] +[unused836] +[unused837] +[unused838] +[unused839] +[unused840] +[unused841] +[unused842] +[unused843] +[unused844] +[unused845] +[unused846] +[unused847] +[unused848] +[unused849] +[unused850] +[unused851] +[unused852] +[unused853] +[unused854] +[unused855] +[unused856] +[unused857] +[unused858] +[unused859] +[unused860] +[unused861] +[unused862] +[unused863] +[unused864] +[unused865] +[unused866] +[unused867] +[unused868] +[unused869] +[unused870] +[unused871] +[unused872] +[unused873] +[unused874] +[unused875] +[unused876] +[unused877] +[unused878] +[unused879] +[unused880] +[unused881] +[unused882] +[unused883] +[unused884] +[unused885] +[unused886] +[unused887] +[unused888] +[unused889] +[unused890] +[unused891] +[unused892] +[unused893] +[unused894] +[unused895] +[unused896] +[unused897] +[unused898] +[unused899] +[unused900] +[unused901] +[unused902] +[unused903] +[unused904] +[unused905] +[unused906] +[unused907] +[unused908] +[unused909] +[unused910] +[unused911] +[unused912] +[unused913] +[unused914] +[unused915] +[unused916] +[unused917] +[unused918] +[unused919] +[unused920] +[unused921] +[unused922] +[unused923] +[unused924] +[unused925] +[unused926] +[unused927] +[unused928] +[unused929] +[unused930] +[unused931] +[unused932] +[unused933] +[unused934] +[unused935] +[unused936] +[unused937] +[unused938] +[unused939] +[unused940] +[unused941] +[unused942] +[unused943] +[unused944] +[unused945] +[unused946] +[unused947] +[unused948] +[unused949] +[unused950] +[unused951] +[unused952] +[unused953] +[unused954] +[unused955] +[unused956] +[unused957] +[unused958] +[unused959] +[unused960] +[unused961] +[unused962] +[unused963] +[unused964] +[unused965] +[unused966] +[unused967] +[unused968] +[unused969] +[unused970] +[unused971] +[unused972] +[unused973] +[unused974] +[unused975] +[unused976] +[unused977] +[unused978] +[unused979] +[unused980] +[unused981] +[unused982] +[unused983] +[unused984] +[unused985] +[unused986] +[unused987] +[unused988] +[unused989] +[unused990] +[unused991] +[unused992] +[unused993] +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +¡ +¢ +£ +¤ +¥ +¦ +§ +¨ +© +ª +« +¬ +® +° +± +² +³ +´ +µ +¶ +· +¹ +º +» +¼ +½ +¾ +¿ +× +ß +æ +ð +÷ +ø +þ +đ +ħ +ı +ł +ŋ +œ +ƒ +ɐ +ɑ +ɒ +ɔ +ɕ +ə +ɛ +ɡ +ɣ +ɨ +ɪ +ɫ +ɬ +ɯ +ɲ +ɴ +ɹ +ɾ +ʀ +ʁ +ʂ +ʃ +ʉ +ʊ +ʋ +ʌ +ʎ +ʐ +ʑ +ʒ +ʔ +ʰ +ʲ +ʳ +ʷ +ʸ +ʻ +ʼ +ʾ +ʿ +ˈ +ː +ˡ +ˢ +ˣ +ˤ +α +β +γ +δ +ε +ζ +η +θ +ι +κ +λ +μ +ν +ξ +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +ђ +є +і +ј +љ +њ +ћ +ӏ +ա +բ +գ +դ +ե +թ +ի +լ +կ +հ +մ +յ +ն +ո +պ +ս +վ +տ +ր +ւ +ք +־ +א +ב +ג +ד +ה +ו +ז +ח +ט +י +ך +כ +ל +ם +מ +ן +נ +ס +ע +ף +פ +ץ +צ +ק +ר +ש +ת +، +ء +ا +ب +ة +ت +ث +ج +ح +خ +د +ذ +ر +ز +س +ش +ص +ض +ط +ظ +ع +غ +ـ +ف +ق +ك +ل +م +ن +ه +و +ى +ي +ٹ +پ +چ +ک +گ +ں +ھ +ہ +ی +ے +अ +आ +उ +ए +क +ख +ग +च +ज +ट +ड +ण +त +थ +द +ध +न +प +ब +भ +म +य +र +ल +व +श +ष +स +ह +ा +ि +ी +ो +। +॥ +ং +অ +আ +ই +উ +এ +ও +ক +খ +গ +চ +ছ +জ +ট +ড +ণ +ত +থ +দ +ধ +ন +প +ব +ভ +ম +য +র +ল +শ +ষ +স +হ +া +ি +ী +ে +க +ச +ட +த +ந +ன +ப +ம +ய +ர +ல +ள +வ +ா +ி +ு +ே +ை +ನ +ರ +ಾ +ක +ය +ර +ල +ව +ා +ก +ง +ต +ท +น +พ +ม +ย +ร +ล +ว +ส +อ +า +เ +་ +། +ག +ང +ད +ན +པ +བ +མ +འ +ར +ལ +ས +မ +ა +ბ +გ +დ +ე +ვ +თ +ი +კ +ლ +მ +ნ +ო +რ +ს +ტ +უ +ᄀ +ᄂ +ᄃ +ᄅ +ᄆ +ᄇ +ᄉ +ᄊ +ᄋ +ᄌ +ᄎ +ᄏ +ᄐ +ᄑ +ᄒ +ᅡ +ᅢ +ᅥ +ᅦ +ᅧ +ᅩ +ᅪ +ᅭ +ᅮ +ᅯ +ᅲ +ᅳ +ᅴ +ᅵ +ᆨ +ᆫ +ᆯ +ᆷ +ᆸ +ᆼ +ᴬ +ᴮ +ᴰ +ᴵ +ᴺ +ᵀ +ᵃ +ᵇ +ᵈ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵖ +ᵗ +ᵘ +ᵢ +ᵣ +ᵤ +ᵥ +ᶜ +ᶠ +‐ +‑ +‒ +– +— +― +‖ +‘ +’ +‚ +“ +” +„ +† +‡ +• +… +‰ +′ +″ +› +‿ +⁄ +⁰ +ⁱ +⁴ +⁵ +⁶ +⁷ +⁸ +⁹ +⁺ +⁻ +ⁿ +₀ +₁ +₂ +₃ +₄ +₅ +₆ +₇ +₈ +₉ +₊ +₍ +₎ +ₐ +ₑ +ₒ +ₓ +ₕ +ₖ +ₗ +ₘ +ₙ +ₚ +ₛ +ₜ +₤ +₩ +€ +₱ +₹ +ℓ +№ +ℝ +™ +⅓ +⅔ +← +↑ +→ +↓ +↔ +↦ +⇄ +⇌ +⇒ +∂ +∅ +∆ +∇ +∈ +− +∗ +∘ +√ +∞ +∧ +∨ +∩ +∪ +≈ +≡ +≤ +≥ +⊂ +⊆ +⊕ +⊗ +⋅ +─ +│ +■ +▪ +● +★ +☆ +☉ +♠ +♣ +♥ +♦ +♭ +♯ +⟨ +⟩ +ⱼ +⺩ +⺼ +⽥ +、 +。 +〈 +〉 +《 +》 +「 +」 +『 +』 +〜 +あ +い +う +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +っ +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +や +ゆ +よ +ら +り +る +れ +ろ +を +ん +ァ +ア +ィ +イ +ウ +ェ +エ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ャ +ュ +ョ +ラ +リ +ル +レ +ロ +ワ +ン +・ +ー +一 +三 +上 +下 +不 +世 +中 +主 +久 +之 +也 +事 +二 +五 +井 +京 +人 +亻 +仁 +介 +代 +仮 +伊 +会 +佐 +侍 +保 +信 +健 +元 +光 +八 +公 +内 +出 +分 +前 +劉 +力 +加 +勝 +北 +区 +十 +千 +南 +博 +原 +口 +古 +史 +司 +合 +吉 +同 +名 +和 +囗 +四 +国 +國 +土 +地 +坂 +城 +堂 +場 +士 +夏 +外 +大 +天 +太 +夫 +奈 +女 +子 +学 +宀 +宇 +安 +宗 +定 +宣 +宮 +家 +宿 +寺 +將 +小 +尚 +山 +岡 +島 +崎 +川 +州 +巿 +帝 +平 +年 +幸 +广 +弘 +張 +彳 +後 +御 +德 +心 +忄 +志 +忠 +愛 +成 +我 +戦 +戸 +手 +扌 +政 +文 +新 +方 +日 +明 +星 +春 +昭 +智 +曲 +書 +月 +有 +朝 +木 +本 +李 +村 +東 +松 +林 +森 +楊 +樹 +橋 +歌 +止 +正 +武 +比 +氏 +民 +水 +氵 +氷 +永 +江 +沢 +河 +治 +法 +海 +清 +漢 +瀬 +火 +版 +犬 +王 +生 +田 +男 +疒 +発 +白 +的 +皇 +目 +相 +省 +真 +石 +示 +社 +神 +福 +禾 +秀 +秋 +空 +立 +章 +竹 +糹 +美 +義 +耳 +良 +艹 +花 +英 +華 +葉 +藤 +行 +街 +西 +見 +訁 +語 +谷 +貝 +貴 +車 +軍 +辶 +道 +郎 +郡 +部 +都 +里 +野 +金 +鈴 +镇 +長 +門 +間 +阝 +阿 +陳 +陽 +雄 +青 +面 +風 +食 +香 +馬 +高 +龍 +龸 +fi +fl +! +( +) +, +- +. +/ +: +? +~ +the +of +and +in +to +was +he +is +as +for +on +with +that +it +his +by +at +from +her +##s +she +you +had +an +were +but +be +this +are +not +my +they +one +which +or +have +him +me +first +all +also +their +has +up +who +out +been +when +after +there +into +new +two +its +##a +time +would +no +what +about +said +we +over +then +other +so +more +##e +can +if +like +back +them +only +some +could +##i +where +just +##ing +during +before +##n +do +##o +made +school +through +than +now +years +most +world +may +between +down +well +three +##d +year +while +will +##ed +##r +##y +later +##t +city +under +around +did +such +being +used +state +people +part +know +against +your +many +second +university +both +national +##er +these +don +known +off +way +until +re +how +even +get +head +... +didn +##ly +team +american +because +de +##l +born +united +film +since +still +long +work +south +us +became +any +high +again +day +family +see +right +man +eyes +house +season +war +states +including +took +life +north +same +each +called +name +much +place +however +go +four +group +another +found +won +area +here +going +10 +away +series +left +home +music +best +make +hand +number +company +several +never +last +john +000 +very +album +take +end +good +too +following +released +game +played +little +began +district +##m +old +want +those +side +held +own +early +county +ll +league +use +west +##u +face +think +##es +2010 +government +##h +march +came +small +general +town +june +##on +line +based +something +##k +september +thought +looked +along +international +2011 +air +july +club +went +january +october +our +august +april +york +12 +few +2012 +2008 +east +show +member +college +2009 +father +public +##us +come +men +five +set +station +church +##c +next +former +november +room +party +located +december +2013 +age +got +2007 +##g +system +let +love +2006 +though +every +2014 +look +song +water +century +without +body +black +night +within +great +women +single +ve +building +large +population +river +named +band +white +started +##an +once +15 +20 +should +18 +2015 +service +top +built +british +open +death +king +moved +local +times +children +february +book +why +11 +door +need +president +order +final +road +wasn +although +due +major +died +village +third +knew +2016 +asked +turned +st +wanted +say +##p +together +received +main +son +served +different +##en +behind +himself +felt +members +power +football +law +voice +play +##in +near +park +history +30 +having +2005 +16 +##man +saw +mother +##al +army +point +front +help +english +street +art +late +hands +games +award +##ia +young +14 +put +published +country +division +across +told +13 +often +ever +french +london +center +six +red +2017 +led +days +include +light +25 +find +tell +among +species +really +according +central +half +2004 +form +original +gave +office +making +enough +lost +full +opened +must +included +live +given +german +player +run +business +woman +community +cup +might +million +land +2000 +court +development +17 +short +round +ii +km +seen +class +story +always +become +sure +research +almost +director +council +la +##2 +career +things +using +island +##z +couldn +car +##is +24 +close +force +##1 +better +free +support +control +field +students +2003 +education +married +##b +nothing +worked +others +record +big +inside +level +anything +continued +give +james +##3 +military +established +non +returned +feel +does +title +written +thing +feet +william +far +co +association +hard +already +2002 +##ra +championship +human +western +100 +##na +department +hall +role +various +production +21 +19 +heart +2001 +living +fire +version +##ers +##f +television +royal +##4 +produced +working +act +case +society +region +present +radio +period +looking +least +total +keep +england +wife +program +per +brother +mind +special +22 +##le +am +works +soon +##6 +political +george +services +taken +created +##7 +further +able +reached +david +union +joined +upon +done +important +social +information +either +##ic +##x +appeared +position +ground +lead +rock +dark +election +23 +board +france +hair +course +arms +site +police +girl +instead +real +sound +##v +words +moment +##te +someone +##8 +summer +project +announced +san +less +wrote +past +followed +##5 +blue +founded +al +finally +india +taking +records +america +##ne +1999 +design +considered +northern +god +stop +battle +toward +european +outside +described +track +today +playing +language +28 +call +26 +heard +professional +low +australia +miles +california +win +yet +green +##ie +trying +blood +##ton +southern +science +maybe +everything +match +square +27 +mouth +video +race +recorded +leave +above +##9 +daughter +points +space +1998 +museum +change +middle +common +##0 +move +tv +post +##ta +lake +seven +tried +elected +closed +ten +paul +minister +##th +months +start +chief +return +canada +person +sea +release +similar +modern +brought +rest +hit +formed +mr +##la +1997 +floor +event +doing +thomas +1996 +robert +care +killed +training +star +week +needed +turn +finished +railway +rather +news +health +sent +example +ran +term +michael +coming +currently +yes +forces +despite +gold +areas +50 +stage +fact +29 +dead +says +popular +2018 +originally +germany +probably +developed +result +pulled +friend +stood +money +running +mi +signed +word +songs +child +eventually +met +tour +average +teams +minutes +festival +current +deep +kind +1995 +decided +usually +eastern +seemed +##ness +episode +bed +added +table +indian +private +charles +route +available +idea +throughout +centre +addition +appointed +style +1994 +books +eight +construction +press +mean +wall +friends +remained +schools +study +##ch +##um +institute +oh +chinese +sometimes +events +possible +1992 +australian +type +brown +forward +talk +process +food +debut +seat +performance +committee +features +character +arts +herself +else +lot +strong +russian +range +hours +peter +arm +##da +morning +dr +sold +##ry +quickly +directed +1993 +guitar +china +##w +31 +list +##ma +performed +media +uk +players +smile +##rs +myself +40 +placed +coach +province +towards +wouldn +leading +whole +boy +official +designed +grand +census +##el +europe +attack +japanese +henry +1991 +##re +##os +cross +getting +alone +action +lower +network +wide +washington +japan +1990 +hospital +believe +changed +sister +##ar +hold +gone +sir +hadn +ship +##ka +studies +academy +shot +rights +below +base +bad +involved +kept +largest +##ist +bank +future +especially +beginning +mark +movement +section +female +magazine +plan +professor +lord +longer +##ian +sat +walked +hill +actually +civil +energy +model +families +size +thus +aircraft +completed +includes +data +captain +##or +fight +vocals +featured +richard +bridge +fourth +1989 +officer +stone +hear +##ism +means +medical +groups +management +self +lips +competition +entire +lived +technology +leaving +federal +tournament +bit +passed +hot +independent +awards +kingdom +mary +spent +fine +doesn +reported +##ling +jack +fall +raised +itself +stay +true +studio +1988 +sports +replaced +paris +systems +saint +leader +theatre +whose +market +capital +parents +spanish +canadian +earth +##ity +cut +degree +writing +bay +christian +awarded +natural +higher +bill +##as +coast +provided +previous +senior +ft +valley +organization +stopped +onto +countries +parts +conference +queen +security +interest +saying +allowed +master +earlier +phone +matter +smith +winning +try +happened +moving +campaign +los +##ley +breath +nearly +mid +1987 +certain +girls +date +italian +african +standing +fell +artist +##ted +shows +deal +mine +industry +1986 +##ng +everyone +republic +provide +collection +library +student +##ville +primary +owned +older +via +heavy +1st +makes +##able +attention +anyone +africa +##ri +stated +length +ended +fingers +command +staff +skin +foreign +opening +governor +okay +medal +kill +sun +cover +job +1985 +introduced +chest +hell +feeling +##ies +success +meet +reason +standard +meeting +novel +1984 +trade +source +buildings +##land +rose +guy +goal +##ur +chapter +native +husband +previously +unit +limited +entered +weeks +producer +operations +mountain +takes +covered +forced +related +roman +complete +successful +key +texas +cold +##ya +channel +1980 +traditional +films +dance +clear +approximately +500 +nine +van +prince +question +active +tracks +ireland +regional +silver +author +personal +sense +operation +##ine +economic +1983 +holding +twenty +isbn +additional +speed +hour +edition +regular +historic +places +whom +shook +movie +km² +secretary +prior +report +chicago +read +foundation +view +engine +scored +1982 +units +ask +airport +property +ready +immediately +lady +month +listed +contract +##de +manager +themselves +lines +##ki +navy +writer +meant +##ts +runs +##ro +practice +championships +singer +glass +commission +required +forest +starting +culture +generally +giving +access +attended +test +couple +stand +catholic +martin +caught +executive +##less +eye +##ey +thinking +chair +quite +shoulder +1979 +hope +decision +plays +defeated +municipality +whether +structure +offered +slowly +pain +ice +direction +##ion +paper +mission +1981 +mostly +200 +noted +individual +managed +nature +lives +plant +##ha +helped +except +studied +computer +figure +relationship +issue +significant +loss +die +smiled +gun +ago +highest +1972 +##am +male +bring +goals +mexico +problem +distance +commercial +completely +location +annual +famous +drive +1976 +neck +1978 +surface +caused +italy +understand +greek +highway +wrong +hotel +comes +appearance +joseph +double +issues +musical +companies +castle +income +review +assembly +bass +initially +parliament +artists +experience +1974 +particular +walk +foot +engineering +talking +window +dropped +##ter +miss +baby +boys +break +1975 +stars +edge +remember +policy +carried +train +stadium +bar +sex +angeles +evidence +##ge +becoming +assistant +soviet +1977 +upper +step +wing +1970 +youth +financial +reach +##ll +actor +numerous +##se +##st +nodded +arrived +##ation +minute +##nt +believed +sorry +complex +beautiful +victory +associated +temple +1968 +1973 +chance +perhaps +metal +##son +1945 +bishop +##et +lee +launched +particularly +tree +le +retired +subject +prize +contains +yeah +theory +empire +##ce +suddenly +waiting +trust +recording +##to +happy +terms +camp +champion +1971 +religious +pass +zealand +names +2nd +port +ancient +tom +corner +represented +watch +legal +anti +justice +cause +watched +brothers +45 +material +changes +simply +response +louis +fast +##ting +answer +60 +historical +1969 +stories +straight +create +feature +increased +rate +administration +virginia +el +activities +cultural +overall +winner +programs +basketball +legs +guard +beyond +cast +doctor +mm +flight +results +remains +cost +effect +winter +##ble +larger +islands +problems +chairman +grew +commander +isn +1967 +pay +failed +selected +hurt +fort +box +regiment +majority +journal +35 +edward +plans +##ke +##ni +shown +pretty +irish +characters +directly +scene +likely +operated +allow +spring +##j +junior +matches +looks +mike +houses +fellow +##tion +beach +marriage +##ham +##ive +rules +oil +65 +florida +expected +nearby +congress +sam +peace +recent +iii +wait +subsequently +cell +##do +variety +serving +agreed +please +poor +joe +pacific +attempt +wood +democratic +piece +prime +##ca +rural +mile +touch +appears +township +1964 +1966 +soldiers +##men +##ized +1965 +pennsylvania +closer +fighting +claimed +score +jones +physical +editor +##ous +filled +genus +specific +sitting +super +mom +##va +therefore +supported +status +fear +cases +store +meaning +wales +minor +spain +tower +focus +vice +frank +follow +parish +separate +golden +horse +fifth +remaining +branch +32 +presented +stared +##id +uses +secret +forms +##co +baseball +exactly +##ck +choice +note +discovered +travel +composed +truth +russia +ball +color +kiss +dad +wind +continue +ring +referred +numbers +digital +greater +##ns +metres +slightly +direct +increase +1960 +responsible +crew +rule +trees +troops +##no +broke +goes +individuals +hundred +weight +creek +sleep +memory +defense +provides +ordered +code +value +jewish +windows +1944 +safe +judge +whatever +corps +realized +growing +pre +##ga +cities +alexander +gaze +lies +spread +scott +letter +showed +situation +mayor +transport +watching +workers +extended +##li +expression +normal +##ment +chart +multiple +border +##ba +host +##ner +daily +mrs +walls +piano +##ko +heat +cannot +##ate +earned +products +drama +era +authority +seasons +join +grade +##io +sign +difficult +machine +1963 +territory +mainly +##wood +stations +squadron +1962 +stepped +iron +19th +##led +serve +appear +sky +speak +broken +charge +knowledge +kilometres +removed +ships +article +campus +simple +##ty +pushed +britain +##ve +leaves +recently +cd +soft +boston +latter +easy +acquired +poland +##sa +quality +officers +presence +planned +nations +mass +broadcast +jean +share +image +influence +wild +offer +emperor +electric +reading +headed +ability +promoted +yellow +ministry +1942 +throat +smaller +politician +##by +latin +spoke +cars +williams +males +lack +pop +80 +##ier +acting +seeing +consists +##ti +estate +1961 +pressure +johnson +newspaper +jr +chris +olympics +online +conditions +beat +elements +walking +vote +##field +needs +carolina +text +featuring +global +block +shirt +levels +francisco +purpose +females +et +dutch +duke +ahead +gas +twice +safety +serious +turning +highly +lieutenant +firm +maria +amount +mixed +daniel +proposed +perfect +agreement +affairs +3rd +seconds +contemporary +paid +1943 +prison +save +kitchen +label +administrative +intended +constructed +academic +nice +teacher +races +1956 +formerly +corporation +ben +nation +issued +shut +1958 +drums +housing +victoria +seems +opera +1959 +graduated +function +von +mentioned +picked +build +recognized +shortly +protection +picture +notable +exchange +elections +1980s +loved +percent +racing +fish +elizabeth +garden +volume +hockey +1941 +beside +settled +##ford +1940 +competed +replied +drew +1948 +actress +marine +scotland +steel +glanced +farm +steve +1957 +risk +tonight +positive +magic +singles +effects +gray +screen +dog +##ja +residents +bus +sides +none +secondary +literature +polish +destroyed +flying +founder +households +1939 +lay +reserve +usa +gallery +##ler +1946 +industrial +younger +approach +appearances +urban +ones +1950 +finish +avenue +powerful +fully +growth +page +honor +jersey +projects +advanced +revealed +basic +90 +infantry +pair +equipment +visit +33 +evening +search +grant +effort +solo +treatment +buried +republican +primarily +bottom +owner +1970s +israel +gives +jim +dream +bob +remain +spot +70 +notes +produce +champions +contact +ed +soul +accepted +ways +del +##ally +losing +split +price +capacity +basis +trial +questions +##ina +1955 +20th +guess +officially +memorial +naval +initial +##ization +whispered +median +engineer +##ful +sydney +##go +columbia +strength +300 +1952 +tears +senate +00 +card +asian +agent +1947 +software +44 +draw +warm +supposed +com +pro +##il +transferred +leaned +##at +candidate +escape +mountains +asia +potential +activity +entertainment +seem +traffic +jackson +murder +36 +slow +product +orchestra +haven +agency +bbc +taught +website +comedy +unable +storm +planning +albums +rugby +environment +scientific +grabbed +protect +##hi +boat +typically +1954 +1953 +damage +principal +divided +dedicated +mount +ohio +##berg +pick +fought +driver +##der +empty +shoulders +sort +thank +berlin +prominent +account +freedom +necessary +efforts +alex +headquarters +follows +alongside +des +simon +andrew +suggested +operating +learning +steps +1949 +sweet +technical +begin +easily +34 +teeth +speaking +settlement +scale +##sh +renamed +ray +max +enemy +semi +joint +compared +##rd +scottish +leadership +analysis +offers +georgia +pieces +captured +animal +deputy +guest +organized +##lin +tony +combined +method +challenge +1960s +huge +wants +battalion +sons +rise +crime +types +facilities +telling +path +1951 +platform +sit +1990s +##lo +tells +assigned +rich +pull +##ot +commonly +alive +##za +letters +concept +conducted +wearing +happen +bought +becomes +holy +gets +ocean +defeat +languages +purchased +coffee +occurred +titled +##q +declared +applied +sciences +concert +sounds +jazz +brain +##me +painting +fleet +tax +nick +##ius +michigan +count +animals +leaders +episodes +##line +content +##den +birth +##it +clubs +64 +palace +critical +refused +fair +leg +laughed +returning +surrounding +participated +formation +lifted +pointed +connected +rome +medicine +laid +taylor +santa +powers +adam +tall +shared +focused +knowing +yards +entrance +falls +##wa +calling +##ad +sources +chosen +beneath +resources +yard +##ite +nominated +silence +zone +defined +##que +gained +thirty +38 +bodies +moon +##ard +adopted +christmas +widely +register +apart +iran +premier +serves +du +unknown +parties +##les +generation +##ff +continues +quick +fields +brigade +quiet +teaching +clothes +impact +weapons +partner +flat +theater +supreme +1938 +37 +relations +##tor +plants +suffered +1936 +wilson +kids +begins +##age +1918 +seats +armed +internet +models +worth +laws +400 +communities +classes +background +knows +thanks +quarter +reaching +humans +carry +killing +format +kong +hong +setting +75 +architecture +disease +railroad +inc +possibly +wish +arthur +thoughts +harry +doors +density +##di +crowd +illinois +stomach +tone +unique +reports +anyway +##ir +liberal +der +vehicle +thick +dry +drug +faced +largely +facility +theme +holds +creation +strange +colonel +##mi +revolution +bell +politics +turns +silent +rail +relief +independence +combat +shape +write +determined +sales +learned +4th +finger +oxford +providing +1937 +heritage +fiction +situated +designated +allowing +distribution +hosted +##est +sight +interview +estimated +reduced +##ria +toronto +footballer +keeping +guys +damn +claim +motion +sport +sixth +stayed +##ze +en +rear +receive +handed +twelve +dress +audience +granted +brazil +##well +spirit +##ated +noticed +etc +olympic +representative +eric +tight +trouble +reviews +drink +vampire +missing +roles +ranked +newly +household +finals +wave +critics +##ee +phase +massachusetts +pilot +unlike +philadelphia +bright +guns +crown +organizations +roof +42 +respectively +clearly +tongue +marked +circle +fox +korea +bronze +brian +expanded +sexual +supply +yourself +inspired +labour +fc +##ah +reference +vision +draft +connection +brand +reasons +1935 +classic +driving +trip +jesus +cells +entry +1920 +neither +trail +claims +atlantic +orders +labor +nose +afraid +identified +intelligence +calls +cancer +attacked +passing +stephen +positions +imperial +grey +jason +39 +sunday +48 +swedish +avoid +extra +uncle +message +covers +allows +surprise +materials +fame +hunter +##ji +1930 +citizens +figures +davis +environmental +confirmed +shit +titles +di +performing +difference +acts +attacks +##ov +existing +votes +opportunity +nor +shop +entirely +trains +opposite +pakistan +##pa +develop +resulted +representatives +actions +reality +pressed +##ish +barely +wine +conversation +faculty +northwest +ends +documentary +nuclear +stock +grace +sets +eat +alternative +##ps +bag +resulting +creating +surprised +cemetery +1919 +drop +finding +sarah +cricket +streets +tradition +ride +1933 +exhibition +target +ear +explained +rain +composer +injury +apartment +municipal +educational +occupied +netherlands +clean +billion +constitution +learn +1914 +maximum +classical +francis +lose +opposition +jose +ontario +bear +core +hills +rolled +ending +drawn +permanent +fun +##tes +##lla +lewis +sites +chamber +ryan +##way +scoring +height +1934 +##house +lyrics +staring +55 +officials +1917 +snow +oldest +##tic +orange +##ger +qualified +interior +apparently +succeeded +thousand +dinner +lights +existence +fans +heavily +41 +greatest +conservative +send +bowl +plus +enter +catch +##un +economy +duty +1929 +speech +authorities +princess +performances +versions +shall +graduate +pictures +effective +remembered +poetry +desk +crossed +starring +starts +passenger +sharp +##ant +acres +ass +weather +falling +rank +fund +supporting +check +adult +publishing +heads +cm +southeast +lane +##burg +application +bc +##ura +les +condition +transfer +prevent +display +ex +regions +earl +federation +cool +relatively +answered +besides +1928 +obtained +portion +##town +mix +##ding +reaction +liked +dean +express +peak +1932 +##tte +counter +religion +chain +rare +miller +convention +aid +lie +vehicles +mobile +perform +squad +wonder +lying +crazy +sword +##ping +attempted +centuries +weren +philosophy +category +##ize +anna +interested +47 +sweden +wolf +frequently +abandoned +kg +literary +alliance +task +entitled +##ay +threw +promotion +factory +tiny +soccer +visited +matt +fm +achieved +52 +defence +internal +persian +43 +methods +##ging +arrested +otherwise +cambridge +programming +villages +elementary +districts +rooms +criminal +conflict +worry +trained +1931 +attempts +waited +signal +bird +truck +subsequent +programme +##ol +ad +49 +communist +details +faith +sector +patrick +carrying +laugh +##ss +controlled +korean +showing +origin +fuel +evil +1927 +##ent +brief +identity +darkness +address +pool +missed +publication +web +planet +ian +anne +wings +invited +##tt +briefly +standards +kissed +##be +ideas +climate +causing +walter +worse +albert +articles +winners +desire +aged +northeast +dangerous +gate +doubt +1922 +wooden +multi +##ky +poet +rising +funding +46 +communications +communication +violence +copies +prepared +ford +investigation +skills +1924 +pulling +electronic +##ak +##ial +##han +containing +ultimately +offices +singing +understanding +restaurant +tomorrow +fashion +christ +ward +da +pope +stands +5th +flow +studios +aired +commissioned +contained +exist +fresh +americans +##per +wrestling +approved +kid +employed +respect +suit +1925 +angel +asking +increasing +frame +angry +selling +1950s +thin +finds +##nd +temperature +statement +ali +explain +inhabitants +towns +extensive +narrow +51 +jane +flowers +images +promise +somewhere +object +fly +closely +##ls +1912 +bureau +cape +1926 +weekly +presidential +legislative +1921 +##ai +##au +launch +founding +##ny +978 +##ring +artillery +strike +un +institutions +roll +writers +landing +chose +kevin +anymore +pp +##ut +attorney +fit +dan +billboard +receiving +agricultural +breaking +sought +dave +admitted +lands +mexican +##bury +charlie +specifically +hole +iv +howard +credit +moscow +roads +accident +1923 +proved +wear +struck +hey +guards +stuff +slid +expansion +1915 +cat +anthony +##kin +melbourne +opposed +sub +southwest +architect +failure +plane +1916 +##ron +map +camera +tank +listen +regarding +wet +introduction +metropolitan +link +ep +fighter +inch +grown +gene +anger +fixed +buy +dvd +khan +domestic +worldwide +chapel +mill +functions +examples +##head +developing +1910 +turkey +hits +pocket +antonio +papers +grow +unless +circuit +18th +concerned +attached +journalist +selection +journey +converted +provincial +painted +hearing +aren +bands +negative +aside +wondered +knight +lap +survey +ma +##ow +noise +billy +##ium +shooting +guide +bedroom +priest +resistance +motor +homes +sounded +giant +##mer +150 +scenes +equal +comic +patients +hidden +solid +actual +bringing +afternoon +touched +funds +wedding +consisted +marie +canal +sr +kim +treaty +turkish +recognition +residence +cathedral +broad +knees +incident +shaped +fired +norwegian +handle +cheek +contest +represent +##pe +representing +beauty +##sen +birds +advantage +emergency +wrapped +drawing +notice +pink +broadcasting +##ong +somehow +bachelor +seventh +collected +registered +establishment +alan +assumed +chemical +personnel +roger +retirement +jeff +portuguese +wore +tied +device +threat +progress +advance +##ised +banks +hired +manchester +nfl +teachers +structures +forever +##bo +tennis +helping +saturday +sale +applications +junction +hip +incorporated +neighborhood +dressed +ceremony +##ds +influenced +hers +visual +stairs +decades +inner +kansas +hung +hoped +gain +scheduled +downtown +engaged +austria +clock +norway +certainly +pale +protected +1913 +victor +employees +plate +putting +surrounded +##ists +finishing +blues +tropical +##ries +minnesota +consider +philippines +accept +54 +retrieved +1900 +concern +anderson +properties +institution +gordon +successfully +vietnam +##dy +backing +outstanding +muslim +crossing +folk +producing +usual +demand +occurs +observed +lawyer +educated +##ana +kelly +string +pleasure +budget +items +quietly +colorado +philip +typical +##worth +derived +600 +survived +asks +mental +##ide +56 +jake +jews +distinguished +ltd +1911 +sri +extremely +53 +athletic +loud +thousands +worried +shadow +transportation +horses +weapon +arena +importance +users +tim +objects +contributed +dragon +douglas +aware +senator +johnny +jordan +sisters +engines +flag +investment +samuel +shock +capable +clark +row +wheel +refers +session +familiar +biggest +wins +hate +maintained +drove +hamilton +request +expressed +injured +underground +churches +walker +wars +tunnel +passes +stupid +agriculture +softly +cabinet +regarded +joining +indiana +##ea +##ms +push +dates +spend +behavior +woods +protein +gently +chase +morgan +mention +burning +wake +combination +occur +mirror +leads +jimmy +indeed +impossible +singapore +paintings +covering +##nes +soldier +locations +attendance +sell +historian +wisconsin +invasion +argued +painter +diego +changing +egypt +##don +experienced +inches +##ku +missouri +vol +grounds +spoken +switzerland +##gan +reform +rolling +ha +forget +massive +resigned +burned +allen +tennessee +locked +values +improved +##mo +wounded +universe +sick +dating +facing +pack +purchase +user +##pur +moments +##ul +merged +anniversary +1908 +coal +brick +understood +causes +dynasty +queensland +establish +stores +crisis +promote +hoping +views +cards +referee +extension +##si +raise +arizona +improve +colonial +formal +charged +##rt +palm +lucky +hide +rescue +faces +95 +feelings +candidates +juan +##ell +goods +6th +courses +weekend +59 +luke +cash +fallen +##om +delivered +affected +installed +carefully +tries +swiss +hollywood +costs +lincoln +responsibility +##he +shore +file +proper +normally +maryland +assistance +jump +constant +offering +friendly +waters +persons +realize +contain +trophy +800 +partnership +factor +58 +musicians +cry +bound +oregon +indicated +hero +houston +medium +##ure +consisting +somewhat +##ara +57 +cycle +##che +beer +moore +frederick +gotten +eleven +worst +weak +approached +arranged +chin +loan +universal +bond +fifteen +pattern +disappeared +##ney +translated +##zed +lip +arab +capture +interests +insurance +##chi +shifted +cave +prix +warning +sections +courts +coat +plot +smell +feed +golf +favorite +maintain +knife +vs +voted +degrees +finance +quebec +opinion +translation +manner +ruled +operate +productions +choose +musician +discovery +confused +tired +separated +stream +techniques +committed +attend +ranking +kings +throw +passengers +measure +horror +fan +mining +sand +danger +salt +calm +decade +dam +require +runner +##ik +rush +associate +greece +##ker +rivers +consecutive +matthew +##ski +sighed +sq +documents +steam +edited +closing +tie +accused +1905 +##ini +islamic +distributed +directors +organisation +bruce +7th +breathing +mad +lit +arrival +concrete +taste +08 +composition +shaking +faster +amateur +adjacent +stating +1906 +twin +flew +##ran +tokyo +publications +##tone +obviously +ridge +storage +1907 +carl +pages +concluded +desert +driven +universities +ages +terminal +sequence +borough +250 +constituency +creative +cousin +economics +dreams +margaret +notably +reduce +montreal +mode +17th +ears +saved +jan +vocal +##ica +1909 +andy +##jo +riding +roughly +threatened +##ise +meters +meanwhile +landed +compete +repeated +grass +czech +regularly +charges +tea +sudden +appeal +##ung +solution +describes +pierre +classification +glad +parking +##ning +belt +physics +99 +rachel +add +hungarian +participate +expedition +damaged +gift +childhood +85 +fifty +##red +mathematics +jumped +letting +defensive +mph +##ux +##gh +testing +##hip +hundreds +shoot +owners +matters +smoke +israeli +kentucky +dancing +mounted +grandfather +emma +designs +profit +argentina +##gs +truly +li +lawrence +cole +begun +detroit +willing +branches +smiling +decide +miami +enjoyed +recordings +##dale +poverty +ethnic +gay +##bi +gary +arabic +09 +accompanied +##one +##ons +fishing +determine +residential +acid +##ary +alice +returns +starred +mail +##ang +jonathan +strategy +##ue +net +forty +cook +businesses +equivalent +commonwealth +distinct +ill +##cy +seriously +##ors +##ped +shift +harris +replace +rio +imagine +formula +ensure +##ber +additionally +scheme +conservation +occasionally +purposes +feels +favor +##and +##ore +1930s +contrast +hanging +hunt +movies +1904 +instruments +victims +danish +christopher +busy +demon +sugar +earliest +colony +studying +balance +duties +##ks +belgium +slipped +carter +05 +visible +stages +iraq +fifa +##im +commune +forming +zero +07 +continuing +talked +counties +legend +bathroom +option +tail +clay +daughters +afterwards +severe +jaw +visitors +##ded +devices +aviation +russell +kate +##vi +entering +subjects +##ino +temporary +swimming +forth +smooth +ghost +audio +bush +operates +rocks +movements +signs +eddie +##tz +ann +voices +honorary +06 +memories +dallas +pure +measures +racial +promised +66 +harvard +ceo +16th +parliamentary +indicate +benefit +flesh +dublin +louisiana +1902 +1901 +patient +sleeping +1903 +membership +coastal +medieval +wanting +element +scholars +rice +62 +limit +survive +makeup +rating +definitely +collaboration +obvious +##tan +boss +ms +baron +birthday +linked +soil +diocese +##lan +ncaa +##mann +offensive +shell +shouldn +waist +##tus +plain +ross +organ +resolution +manufacturing +adding +relative +kennedy +98 +whilst +moth +marketing +gardens +crash +72 +heading +partners +credited +carlos +moves +cable +##zi +marshall +##out +depending +bottle +represents +rejected +responded +existed +04 +jobs +denmark +lock +##ating +treated +graham +routes +talent +commissioner +drugs +secure +tests +reign +restored +photography +##gi +contributions +oklahoma +designer +disc +grin +seattle +robin +paused +atlanta +unusual +##gate +praised +las +laughing +satellite +hungary +visiting +##sky +interesting +factors +deck +poems +norman +##water +stuck +speaker +rifle +domain +premiered +##her +dc +comics +actors +01 +reputation +eliminated +8th +ceiling +prisoners +script +##nce +leather +austin +mississippi +rapidly +admiral +parallel +charlotte +guilty +tools +gender +divisions +fruit +##bs +laboratory +nelson +fantasy +marry +rapid +aunt +tribe +requirements +aspects +suicide +amongst +adams +bone +ukraine +abc +kick +sees +edinburgh +clothing +column +rough +gods +hunting +broadway +gathered +concerns +##ek +spending +ty +12th +snapped +requires +solar +bones +cavalry +##tta +iowa +drinking +waste +index +franklin +charity +thompson +stewart +tip +flash +landscape +friday +enjoy +singh +poem +listening +##back +eighth +fred +differences +adapted +bomb +ukrainian +surgery +corporate +masters +anywhere +##more +waves +odd +sean +portugal +orleans +dick +debate +kent +eating +puerto +cleared +96 +expect +cinema +97 +guitarist +blocks +electrical +agree +involving +depth +dying +panel +struggle +##ged +peninsula +adults +novels +emerged +vienna +metro +debuted +shoes +tamil +songwriter +meets +prove +beating +instance +heaven +scared +sending +marks +artistic +passage +superior +03 +significantly +shopping +##tive +retained +##izing +malaysia +technique +cheeks +##ola +warren +maintenance +destroy +extreme +allied +120 +appearing +##yn +fill +advice +alabama +qualifying +policies +cleveland +hat +battery +smart +authors +10th +soundtrack +acted +dated +lb +glance +equipped +coalition +funny +outer +ambassador +roy +possibility +couples +campbell +dna +loose +ethan +supplies +1898 +gonna +88 +monster +##res +shake +agents +frequency +springs +dogs +practices +61 +gang +plastic +easier +suggests +gulf +blade +exposed +colors +industries +markets +pan +nervous +electoral +charts +legislation +ownership +##idae +mac +appointment +shield +copy +assault +socialist +abbey +monument +license +throne +employment +jay +93 +replacement +charter +cloud +powered +suffering +accounts +oak +connecticut +strongly +wright +colour +crystal +13th +context +welsh +networks +voiced +gabriel +jerry +##cing +forehead +mp +##ens +manage +schedule +totally +remix +##ii +forests +occupation +print +nicholas +brazilian +strategic +vampires +engineers +76 +roots +seek +correct +instrumental +und +alfred +backed +hop +##des +stanley +robinson +traveled +wayne +welcome +austrian +achieve +67 +exit +rates +1899 +strip +whereas +##cs +sing +deeply +adventure +bobby +rick +jamie +careful +components +cap +useful +personality +knee +##shi +pushing +hosts +02 +protest +ca +ottoman +symphony +##sis +63 +boundary +1890 +processes +considering +considerable +tons +##work +##ft +##nia +cooper +trading +dear +conduct +91 +illegal +apple +revolutionary +holiday +definition +harder +##van +jacob +circumstances +destruction +##lle +popularity +grip +classified +liverpool +donald +baltimore +flows +seeking +honour +approval +92 +mechanical +till +happening +statue +critic +increasingly +immediate +describe +commerce +stare +##ster +indonesia +meat +rounds +boats +baker +orthodox +depression +formally +worn +naked +claire +muttered +sentence +11th +emily +document +77 +criticism +wished +vessel +spiritual +bent +virgin +parker +minimum +murray +lunch +danny +printed +compilation +keyboards +false +blow +belonged +68 +raising +78 +cutting +##board +pittsburgh +##up +9th +shadows +81 +hated +indigenous +jon +15th +barry +scholar +ah +##zer +oliver +##gy +stick +susan +meetings +attracted +spell +romantic +##ver +ye +1895 +photo +demanded +customers +##ac +1896 +logan +revival +keys +modified +commanded +jeans +##ious +upset +raw +phil +detective +hiding +resident +vincent +##bly +experiences +diamond +defeating +coverage +lucas +external +parks +franchise +helen +bible +successor +percussion +celebrated +il +lift +profile +clan +romania +##ied +mills +##su +nobody +achievement +shrugged +fault +1897 +rhythm +initiative +breakfast +carbon +700 +69 +lasted +violent +74 +wound +ken +killer +gradually +filmed +°c +dollars +processing +94 +remove +criticized +guests +sang +chemistry +##vin +legislature +disney +##bridge +uniform +escaped +integrated +proposal +purple +denied +liquid +karl +influential +morris +nights +stones +intense +experimental +twisted +71 +84 +##ld +pace +nazi +mitchell +ny +blind +reporter +newspapers +14th +centers +burn +basin +forgotten +surviving +filed +collections +monastery +losses +manual +couch +description +appropriate +merely +tag +missions +sebastian +restoration +replacing +triple +73 +elder +julia +warriors +benjamin +julian +convinced +stronger +amazing +declined +versus +merchant +happens +output +finland +bare +barbara +absence +ignored +dawn +injuries +##port +producers +##ram +82 +luis +##ities +kw +admit +expensive +electricity +nba +exception +symbol +##ving +ladies +shower +sheriff +characteristics +##je +aimed +button +ratio +effectively +summit +angle +jury +bears +foster +vessels +pants +executed +evans +dozen +advertising +kicked +patrol +1889 +competitions +lifetime +principles +athletics +##logy +birmingham +sponsored +89 +rob +nomination +1893 +acoustic +##sm +creature +longest +##tra +credits +harbor +dust +josh +##so +territories +milk +infrastructure +completion +thailand +indians +leon +archbishop +##sy +assist +pitch +blake +arrangement +girlfriend +serbian +operational +hence +sad +scent +fur +dj +sessions +hp +refer +rarely +##ora +exists +1892 +##ten +scientists +dirty +penalty +burst +portrait +seed +79 +pole +limits +rival +1894 +stable +alpha +grave +constitutional +alcohol +arrest +flower +mystery +devil +architectural +relationships +greatly +habitat +##istic +larry +progressive +remote +cotton +##ics +##ok +preserved +reaches +##ming +cited +86 +vast +scholarship +decisions +cbs +joy +teach +1885 +editions +knocked +eve +searching +partly +participation +gap +animated +fate +excellent +##ett +na +87 +alternate +saints +youngest +##ily +climbed +##ita +##tors +suggest +##ct +discussion +staying +choir +lakes +jacket +revenue +nevertheless +peaked +instrument +wondering +annually +managing +neil +1891 +signing +terry +##ice +apply +clinical +brooklyn +aim +catherine +fuck +farmers +figured +ninth +pride +hugh +evolution +ordinary +involvement +comfortable +shouted +tech +encouraged +taiwan +representation +sharing +##lia +##em +panic +exact +cargo +competing +fat +cried +83 +1920s +occasions +pa +cabin +borders +utah +marcus +##isation +badly +muscles +##ance +victorian +transition +warner +bet +permission +##rin +slave +terrible +similarly +shares +seth +uefa +possession +medals +benefits +colleges +lowered +perfectly +mall +transit +##ye +##kar +publisher +##ened +harrison +deaths +elevation +##ae +asleep +machines +sigh +ash +hardly +argument +occasion +parent +leo +decline +1888 +contribution +##ua +concentration +1000 +opportunities +hispanic +guardian +extent +emotions +hips +mason +volumes +bloody +controversy +diameter +steady +mistake +phoenix +identify +violin +##sk +departure +richmond +spin +funeral +enemies +1864 +gear +literally +connor +random +sergeant +grab +confusion +1865 +transmission +informed +op +leaning +sacred +suspended +thinks +gates +portland +luck +agencies +yours +hull +expert +muscle +layer +practical +sculpture +jerusalem +latest +lloyd +statistics +deeper +recommended +warrior +arkansas +mess +supports +greg +eagle +1880 +recovered +rated +concerts +rushed +##ano +stops +eggs +files +premiere +keith +##vo +delhi +turner +pit +affair +belief +paint +##zing +mate +##ach +##ev +victim +##ology +withdrew +bonus +styles +fled +##ud +glasgow +technologies +funded +nbc +adaptation +##ata +portrayed +cooperation +supporters +judges +bernard +justin +hallway +ralph +##ick +graduating +controversial +distant +continental +spider +bite +##ho +recognize +intention +mixing +##ese +egyptian +bow +tourism +suppose +claiming +tiger +dominated +participants +vi +##ru +nurse +partially +tape +##rum +psychology +##rn +essential +touring +duo +voting +civilian +emotional +channels +##king +apparent +hebrew +1887 +tommy +carrier +intersection +beast +hudson +##gar +##zo +lab +nova +bench +discuss +costa +##ered +detailed +behalf +drivers +unfortunately +obtain +##lis +rocky +##dae +siege +friendship +honey +##rian +1861 +amy +hang +posted +governments +collins +respond +wildlife +preferred +operator +##po +laura +pregnant +videos +dennis +suspected +boots +instantly +weird +automatic +businessman +alleged +placing +throwing +ph +mood +1862 +perry +venue +jet +remainder +##lli +##ci +passion +biological +boyfriend +1863 +dirt +buffalo +ron +segment +fa +abuse +##era +genre +thrown +stroke +colored +stress +exercise +displayed +##gen +struggled +##tti +abroad +dramatic +wonderful +thereafter +madrid +component +widespread +##sed +tale +citizen +todd +monday +1886 +vancouver +overseas +forcing +crying +descent +##ris +discussed +substantial +ranks +regime +1870 +provinces +switch +drum +zane +ted +tribes +proof +lp +cream +researchers +volunteer +manor +silk +milan +donated +allies +venture +principle +delivery +enterprise +##ves +##ans +bars +traditionally +witch +reminded +copper +##uk +pete +inter +links +colin +grinned +elsewhere +competitive +frequent +##oy +scream +##hu +tension +texts +submarine +finnish +defending +defend +pat +detail +1884 +affiliated +stuart +themes +villa +periods +tool +belgian +ruling +crimes +answers +folded +licensed +resort +demolished +hans +lucy +1881 +lion +traded +photographs +writes +craig +##fa +trials +generated +beth +noble +debt +percentage +yorkshire +erected +ss +viewed +grades +confidence +ceased +islam +telephone +retail +##ible +chile +m² +roberts +sixteen +##ich +commented +hampshire +innocent +dual +pounds +checked +regulations +afghanistan +sung +rico +liberty +assets +bigger +options +angels +relegated +tribute +wells +attending +leaf +##yan +butler +romanian +forum +monthly +lisa +patterns +gmina +##tory +madison +hurricane +rev +##ians +bristol +##ula +elite +valuable +disaster +democracy +awareness +germans +freyja +##ins +loop +absolutely +paying +populations +maine +sole +prayer +spencer +releases +doorway +bull +##ani +lover +midnight +conclusion +##sson +thirteen +lily +mediterranean +##lt +nhl +proud +sample +##hill +drummer +guinea +##ova +murphy +climb +##ston +instant +attributed +horn +ain +railways +steven +##ao +autumn +ferry +opponent +root +traveling +secured +corridor +stretched +tales +sheet +trinity +cattle +helps +indicates +manhattan +murdered +fitted +1882 +gentle +grandmother +mines +shocked +vegas +produces +##light +caribbean +##ou +belong +continuous +desperate +drunk +historically +trio +waved +raf +dealing +nathan +bat +murmured +interrupted +residing +scientist +pioneer +harold +aaron +##net +delta +attempting +minority +mini +believes +chorus +tend +lots +eyed +indoor +load +shots +updated +jail +##llo +concerning +connecting +wealth +##ved +slaves +arrive +rangers +sufficient +rebuilt +##wick +cardinal +flood +muhammad +whenever +relation +runners +moral +repair +viewers +arriving +revenge +punk +assisted +bath +fairly +breathe +lists +innings +illustrated +whisper +nearest +voters +clinton +ties +ultimate +screamed +beijing +lions +andre +fictional +gathering +comfort +radar +suitable +dismissed +hms +ban +pine +wrist +atmosphere +voivodeship +bid +timber +##ned +##nan +giants +##ane +cameron +recovery +uss +identical +categories +switched +serbia +laughter +noah +ensemble +therapy +peoples +touching +##off +locally +pearl +platforms +everywhere +ballet +tables +lanka +herbert +outdoor +toured +derek +1883 +spaces +contested +swept +1878 +exclusive +slight +connections +##dra +winds +prisoner +collective +bangladesh +tube +publicly +wealthy +thai +##ys +isolated +select +##ric +insisted +pen +fortune +ticket +spotted +reportedly +animation +enforcement +tanks +110 +decides +wider +lowest +owen +##time +nod +hitting +##hn +gregory +furthermore +magazines +fighters +solutions +##ery +pointing +requested +peru +reed +chancellor +knights +mask +worker +eldest +flames +reduction +1860 +volunteers +##tis +reporting +##hl +wire +advisory +endemic +origins +settlers +pursue +knock +consumer +1876 +eu +compound +creatures +mansion +sentenced +ivan +deployed +guitars +frowned +involves +mechanism +kilometers +perspective +shops +maps +terminus +duncan +alien +fist +bridges +##pers +heroes +fed +derby +swallowed +##ros +patent +sara +illness +characterized +adventures +slide +hawaii +jurisdiction +##op +organised +##side +adelaide +walks +biology +se +##ties +rogers +swing +tightly +boundaries +##rie +prepare +implementation +stolen +##sha +certified +colombia +edwards +garage +##mm +recalled +##ball +rage +harm +nigeria +breast +##ren +furniture +pupils +settle +##lus +cuba +balls +client +alaska +21st +linear +thrust +celebration +latino +genetic +terror +##cia +##ening +lightning +fee +witness +lodge +establishing +skull +##ique +earning +hood +##ei +rebellion +wang +sporting +warned +missile +devoted +activist +porch +worship +fourteen +package +1871 +decorated +##shire +housed +##ock +chess +sailed +doctors +oscar +joan +treat +garcia +harbour +jeremy +##ire +traditions +dominant +jacques +##gon +##wan +relocated +1879 +amendment +sized +companion +simultaneously +volleyball +spun +acre +increases +stopping +loves +belongs +affect +drafted +tossed +scout +battles +1875 +filming +shoved +munich +tenure +vertical +romance +pc +##cher +argue +##ical +craft +ranging +www +opens +honest +tyler +yesterday +virtual +##let +muslims +reveal +snake +immigrants +radical +screaming +speakers +firing +saving +belonging +ease +lighting +prefecture +blame +farmer +hungry +grows +rubbed +beam +sur +subsidiary +##cha +armenian +sao +dropping +conventional +##fer +microsoft +reply +qualify +spots +1867 +sweat +festivals +##ken +immigration +physician +discover +exposure +sandy +explanation +isaac +implemented +##fish +hart +initiated +connect +stakes +presents +heights +householder +pleased +tourist +regardless +slip +closest +##ction +surely +sultan +brings +riley +preparation +aboard +slammed +baptist +experiment +ongoing +interstate +organic +playoffs +##ika +1877 +130 +##tar +hindu +error +tours +tier +plenty +arrangements +talks +trapped +excited +sank +ho +athens +1872 +denver +welfare +suburb +athletes +trick +diverse +belly +exclusively +yelled +1868 +##med +conversion +##ette +1874 +internationally +computers +conductor +abilities +sensitive +hello +dispute +measured +globe +rocket +prices +amsterdam +flights +tigers +inn +municipalities +emotion +references +3d +##mus +explains +airlines +manufactured +pm +archaeological +1873 +interpretation +devon +comment +##ites +settlements +kissing +absolute +improvement +suite +impressed +barcelona +sullivan +jefferson +towers +jesse +julie +##tin +##lu +grandson +hi +gauge +regard +rings +interviews +trace +raymond +thumb +departments +burns +serial +bulgarian +scores +demonstrated +##ix +1866 +kyle +alberta +underneath +romanized +##ward +relieved +acquisition +phrase +cliff +reveals +han +cuts +merger +custom +##dar +nee +gilbert +graduation +##nts +assessment +cafe +difficulty +demands +swung +democrat +jennifer +commons +1940s +grove +##yo +completing +focuses +sum +substitute +bearing +stretch +reception +##py +reflected +essentially +destination +pairs +##ched +survival +resource +##bach +promoting +doubles +messages +tear +##down +##fully +parade +florence +harvey +incumbent +partial +framework +900 +pedro +frozen +procedure +olivia +controls +##mic +shelter +personally +temperatures +##od +brisbane +tested +sits +marble +comprehensive +oxygen +leonard +##kov +inaugural +iranian +referring +quarters +attitude +##ivity +mainstream +lined +mars +dakota +norfolk +unsuccessful +##° +explosion +helicopter +congressional +##sing +inspector +bitch +seal +departed +divine +##ters +coaching +examination +punishment +manufacturer +sink +columns +unincorporated +signals +nevada +squeezed +dylan +dining +photos +martial +manuel +eighteen +elevator +brushed +plates +ministers +ivy +congregation +##len +slept +specialized +taxes +curve +restricted +negotiations +likes +statistical +arnold +inspiration +execution +bold +intermediate +significance +margin +ruler +wheels +gothic +intellectual +dependent +listened +eligible +buses +widow +syria +earn +cincinnati +collapsed +recipient +secrets +accessible +philippine +maritime +goddess +clerk +surrender +breaks +playoff +database +##ified +##lon +ideal +beetle +aspect +soap +regulation +strings +expand +anglo +shorter +crosses +retreat +tough +coins +wallace +directions +pressing +##oon +shipping +locomotives +comparison +topics +nephew +##mes +distinction +honors +travelled +sierra +ibn +##over +fortress +sa +recognised +carved +1869 +clients +##dan +intent +##mar +coaches +describing +bread +##ington +beaten +northwestern +##ona +merit +youtube +collapse +challenges +em +historians +objective +submitted +virus +attacking +drake +assume +##ere +diseases +marc +stem +leeds +##cus +##ab +farming +glasses +##lock +visits +nowhere +fellowship +relevant +carries +restaurants +experiments +101 +constantly +bases +targets +shah +tenth +opponents +verse +territorial +##ira +writings +corruption +##hs +instruction +inherited +reverse +emphasis +##vic +employee +arch +keeps +rabbi +watson +payment +uh +##ala +nancy +##tre +venice +fastest +sexy +banned +adrian +properly +ruth +touchdown +dollar +boards +metre +circles +edges +favour +comments +ok +travels +liberation +scattered +firmly +##ular +holland +permitted +diesel +kenya +den +originated +##ral +demons +resumed +dragged +rider +##rus +servant +blinked +extend +torn +##ias +##sey +input +meal +everybody +cylinder +kinds +camps +##fe +bullet +logic +##wn +croatian +evolved +healthy +fool +chocolate +wise +preserve +pradesh +##ess +respective +1850 +##ew +chicken +artificial +gross +corresponding +convicted +cage +caroline +dialogue +##dor +narrative +stranger +mario +br +christianity +failing +trent +commanding +buddhist +1848 +maurice +focusing +yale +bike +altitude +##ering +mouse +revised +##sley +veteran +##ig +pulls +theology +crashed +campaigns +legion +##ability +drag +excellence +customer +cancelled +intensity +excuse +##lar +liga +participating +contributing +printing +##burn +variable +##rk +curious +bin +legacy +renaissance +##my +symptoms +binding +vocalist +dancer +##nie +grammar +gospel +democrats +ya +enters +sc +diplomatic +hitler +##ser +clouds +mathematical +quit +defended +oriented +##heim +fundamental +hardware +impressive +equally +convince +confederate +guilt +chuck +sliding +##ware +magnetic +narrowed +petersburg +bulgaria +otto +phd +skill +##ama +reader +hopes +pitcher +reservoir +hearts +automatically +expecting +mysterious +bennett +extensively +imagined +seeds +monitor +fix +##ative +journalism +struggling +signature +ranch +encounter +photographer +observation +protests +##pin +influences +##hr +calendar +##all +cruz +croatia +locomotive +hughes +naturally +shakespeare +basement +hook +uncredited +faded +theories +approaches +dare +phillips +filling +fury +obama +##ain +efficient +arc +deliver +min +raid +breeding +inducted +leagues +efficiency +axis +montana +eagles +##ked +supplied +instructions +karen +picking +indicating +trap +anchor +practically +christians +tomb +vary +occasional +electronics +lords +readers +newcastle +faint +innovation +collect +situations +engagement +160 +claude +mixture +##feld +peer +tissue +logo +lean +##ration +°f +floors +##ven +architects +reducing +##our +##ments +rope +1859 +ottawa +##har +samples +banking +declaration +proteins +resignation +francois +saudi +advocate +exhibited +armor +twins +divorce +##ras +abraham +reviewed +jo +temporarily +matrix +physically +pulse +curled +##ena +difficulties +bengal +usage +##ban +annie +riders +certificate +##pi +holes +warsaw +distinctive +jessica +##mon +mutual +1857 +customs +circular +eugene +removal +loaded +mere +vulnerable +depicted +generations +dame +heir +enormous +lightly +climbing +pitched +lessons +pilots +nepal +ram +google +preparing +brad +louise +renowned +##₂ +liam +##ably +plaza +shaw +sophie +brilliant +bills +##bar +##nik +fucking +mainland +server +pleasant +seized +veterans +jerked +fail +beta +brush +radiation +stored +warmth +southeastern +nate +sin +raced +berkeley +joke +athlete +designation +trunk +##low +roland +qualification +archives +heels +artwork +receives +judicial +reserves +##bed +woke +installation +abu +floating +fake +lesser +excitement +interface +concentrated +addressed +characteristic +amanda +saxophone +monk +auto +##bus +releasing +egg +dies +interaction +defender +ce +outbreak +glory +loving +##bert +sequel +consciousness +http +awake +ski +enrolled +##ress +handling +rookie +brow +somebody +biography +warfare +amounts +contracts +presentation +fabric +dissolved +challenged +meter +psychological +lt +elevated +rally +accurate +##tha +hospitals +undergraduate +specialist +venezuela +exhibit +shed +nursing +protestant +fluid +structural +footage +jared +consistent +prey +##ska +succession +reflect +exile +lebanon +wiped +suspect +shanghai +resting +integration +preservation +marvel +variant +pirates +sheep +rounded +capita +sailing +colonies +manuscript +deemed +variations +clarke +functional +emerging +boxing +relaxed +curse +azerbaijan +heavyweight +nickname +editorial +rang +grid +tightened +earthquake +flashed +miguel +rushing +##ches +improvements +boxes +brooks +180 +consumption +molecular +felix +societies +repeatedly +variation +aids +civic +graphics +professionals +realm +autonomous +receiver +delayed +workshop +militia +chairs +trump +canyon +##point +harsh +extending +lovely +happiness +##jan +stake +eyebrows +embassy +wellington +hannah +##ella +sony +corners +bishops +swear +cloth +contents +xi +namely +commenced +1854 +stanford +nashville +courage +graphic +commitment +garrison +##bin +hamlet +clearing +rebels +attraction +literacy +cooking +ruins +temples +jenny +humanity +celebrate +hasn +freight +sixty +rebel +bastard +##art +newton +##ada +deer +##ges +##ching +smiles +delaware +singers +##ets +approaching +assists +flame +##ph +boulevard +barrel +planted +##ome +pursuit +##sia +consequences +posts +shallow +invitation +rode +depot +ernest +kane +rod +concepts +preston +topic +chambers +striking +blast +arrives +descendants +montgomery +ranges +worlds +##lay +##ari +span +chaos +praise +##ag +fewer +1855 +sanctuary +mud +fbi +##ions +programmes +maintaining +unity +harper +bore +handsome +closure +tournaments +thunder +nebraska +linda +facade +puts +satisfied +argentine +dale +cork +dome +panama +##yl +1858 +tasks +experts +##ates +feeding +equation +##las +##ida +##tu +engage +bryan +##ax +um +quartet +melody +disbanded +sheffield +blocked +gasped +delay +kisses +maggie +connects +##non +sts +poured +creator +publishers +##we +guided +ellis +extinct +hug +gaining +##ord +complicated +##bility +poll +clenched +investigate +##use +thereby +quantum +spine +cdp +humor +kills +administered +semifinals +##du +encountered +ignore +##bu +commentary +##maker +bother +roosevelt +140 +plains +halfway +flowing +cultures +crack +imprisoned +neighboring +airline +##ses +##view +##mate +##ec +gather +wolves +marathon +transformed +##ill +cruise +organisations +carol +punch +exhibitions +numbered +alarm +ratings +daddy +silently +##stein +queens +colours +impression +guidance +liu +tactical +##rat +marshal +della +arrow +##ings +rested +feared +tender +owns +bitter +advisor +escort +##ides +spare +farms +grants +##ene +dragons +encourage +colleagues +cameras +##und +sucked +pile +spirits +prague +statements +suspension +landmark +fence +torture +recreation +bags +permanently +survivors +pond +spy +predecessor +bombing +coup +##og +protecting +transformation +glow +##lands +##book +dug +priests +andrea +feat +barn +jumping +##chen +##ologist +##con +casualties +stern +auckland +pipe +serie +revealing +ba +##bel +trevor +mercy +spectrum +yang +consist +governing +collaborated +possessed +epic +comprises +blew +shane +##ack +lopez +honored +magical +sacrifice +judgment +perceived +hammer +mtv +baronet +tune +das +missionary +sheets +350 +neutral +oral +threatening +attractive +shade +aims +seminary +##master +estates +1856 +michel +wounds +refugees +manufacturers +##nic +mercury +syndrome +porter +##iya +##din +hamburg +identification +upstairs +purse +widened +pause +cared +breathed +affiliate +santiago +prevented +celtic +fisher +125 +recruited +byzantine +reconstruction +farther +##mp +diet +sake +au +spite +sensation +##ert +blank +separation +105 +##hon +vladimir +armies +anime +##lie +accommodate +orbit +cult +sofia +archive +##ify +##box +founders +sustained +disorder +honours +northeastern +mia +crops +violet +threats +blanket +fires +canton +followers +southwestern +prototype +voyage +assignment +altered +moderate +protocol +pistol +##eo +questioned +brass +lifting +1852 +math +authored +##ual +doug +dimensional +dynamic +##san +1851 +pronounced +grateful +quest +uncomfortable +boom +presidency +stevens +relating +politicians +chen +barrier +quinn +diana +mosque +tribal +cheese +palmer +portions +sometime +chester +treasure +wu +bend +download +millions +reforms +registration +##osa +consequently +monitoring +ate +preliminary +brandon +invented +ps +eaten +exterior +intervention +ports +documented +log +displays +lecture +sally +favourite +##itz +vermont +lo +invisible +isle +breed +##ator +journalists +relay +speaks +backward +explore +midfielder +actively +stefan +procedures +cannon +blond +kenneth +centered +servants +chains +libraries +malcolm +essex +henri +slavery +##hal +facts +fairy +coached +cassie +cats +washed +cop +##fi +announcement +item +2000s +vinyl +activated +marco +frontier +growled +curriculum +##das +loyal +accomplished +leslie +ritual +kenny +##00 +vii +napoleon +hollow +hybrid +jungle +stationed +friedrich +counted +##ulated +platinum +theatrical +seated +col +rubber +glen +1840 +diversity +healing +extends +id +provisions +administrator +columbus +##oe +tributary +te +assured +org +##uous +prestigious +examined +lectures +grammy +ronald +associations +bailey +allan +essays +flute +believing +consultant +proceedings +travelling +1853 +kit +kerala +yugoslavia +buddy +methodist +##ith +burial +centres +batman +##nda +discontinued +bo +dock +stockholm +lungs +severely +##nk +citing +manga +##ugh +steal +mumbai +iraqi +robot +celebrity +bride +broadcasts +abolished +pot +joel +overhead +franz +packed +reconnaissance +johann +acknowledged +introduce +handled +doctorate +developments +drinks +alley +palestine +##nis +##aki +proceeded +recover +bradley +grain +patch +afford +infection +nationalist +legendary +##ath +interchange +virtually +gen +gravity +exploration +amber +vital +wishes +powell +doctrine +elbow +screenplay +##bird +contribute +indonesian +pet +creates +##com +enzyme +kylie +discipline +drops +manila +hunger +##ien +layers +suffer +fever +bits +monica +keyboard +manages +##hood +searched +appeals +##bad +testament +grande +reid +##war +beliefs +congo +##ification +##dia +si +requiring +##via +casey +1849 +regret +streak +rape +depends +syrian +sprint +pound +tourists +upcoming +pub +##xi +tense +##els +practiced +echo +nationwide +guild +motorcycle +liz +##zar +chiefs +desired +elena +bye +precious +absorbed +relatives +booth +pianist +##mal +citizenship +exhausted +wilhelm +##ceae +##hed +noting +quarterback +urge +hectares +##gue +ace +holly +##tal +blonde +davies +parked +sustainable +stepping +twentieth +airfield +galaxy +nest +chip +##nell +tan +shaft +paulo +requirement +##zy +paradise +tobacco +trans +renewed +vietnamese +##cker +##ju +suggesting +catching +holmes +enjoying +md +trips +colt +holder +butterfly +nerve +reformed +cherry +bowling +trailer +carriage +goodbye +appreciate +toy +joshua +interactive +enabled +involve +##kan +collar +determination +bunch +facebook +recall +shorts +superintendent +episcopal +frustration +giovanni +nineteenth +laser +privately +array +circulation +##ovic +armstrong +deals +painful +permit +discrimination +##wi +aires +retiring +cottage +ni +##sta +horizon +ellen +jamaica +ripped +fernando +chapters +playstation +patron +lecturer +navigation +behaviour +genes +georgian +export +solomon +rivals +swift +seventeen +rodriguez +princeton +independently +sox +1847 +arguing +entity +casting +hank +criteria +oakland +geographic +milwaukee +reflection +expanding +conquest +dubbed +##tv +halt +brave +brunswick +doi +arched +curtis +divorced +predominantly +somerset +streams +ugly +zoo +horrible +curved +buenos +fierce +dictionary +vector +theological +unions +handful +stability +chan +punjab +segments +##lly +altar +ignoring +gesture +monsters +pastor +##stone +thighs +unexpected +operators +abruptly +coin +compiled +associates +improving +migration +pin +##ose +compact +collegiate +reserved +##urs +quarterfinals +roster +restore +assembled +hurry +oval +##cies +1846 +flags +martha +##del +victories +sharply +##rated +argues +deadly +neo +drawings +symbols +performer +##iel +griffin +restrictions +editing +andrews +java +journals +arabia +compositions +dee +pierce +removing +hindi +casino +runway +civilians +minds +nasa +hotels +##zation +refuge +rent +retain +potentially +conferences +suburban +conducting +##tto +##tions +##tle +descended +massacre +##cal +ammunition +terrain +fork +souls +counts +chelsea +durham +drives +cab +##bank +perth +realizing +palestinian +finn +simpson +##dal +betty +##ule +moreover +particles +cardinals +tent +evaluation +extraordinary +##oid +inscription +##works +wednesday +chloe +maintains +panels +ashley +trucks +##nation +cluster +sunlight +strikes +zhang +##wing +dialect +canon +##ap +tucked +##ws +collecting +##mas +##can +##sville +maker +quoted +evan +franco +aria +buying +cleaning +eva +closet +provision +apollo +clinic +rat +##ez +necessarily +ac +##gle +##ising +venues +flipped +cent +spreading +trustees +checking +authorized +##sco +disappointed +##ado +notion +duration +trumpet +hesitated +topped +brussels +rolls +theoretical +hint +define +aggressive +repeat +wash +peaceful +optical +width +allegedly +mcdonald +strict +copyright +##illa +investors +mar +jam +witnesses +sounding +miranda +michelle +privacy +hugo +harmony +##pp +valid +lynn +glared +nina +102 +headquartered +diving +boarding +gibson +##ncy +albanian +marsh +routine +dealt +enhanced +er +intelligent +substance +targeted +enlisted +discovers +spinning +observations +pissed +smoking +rebecca +capitol +visa +varied +costume +seemingly +indies +compensation +surgeon +thursday +arsenal +westminster +suburbs +rid +anglican +##ridge +knots +foods +alumni +lighter +fraser +whoever +portal +scandal +##ray +gavin +advised +instructor +flooding +terrorist +##ale +teenage +interim +senses +duck +teen +thesis +abby +eager +overcome +##ile +newport +glenn +rises +shame +##cc +prompted +priority +forgot +bomber +nicolas +protective +360 +cartoon +katherine +breeze +lonely +trusted +henderson +richardson +relax +banner +candy +palms +remarkable +##rio +legends +cricketer +essay +ordained +edmund +rifles +trigger +##uri +##away +sail +alert +1830 +audiences +penn +sussex +siblings +pursued +indianapolis +resist +rosa +consequence +succeed +avoided +1845 +##ulation +inland +##tie +##nna +counsel +profession +chronicle +hurried +##una +eyebrow +eventual +bleeding +innovative +cure +##dom +committees +accounting +con +scope +hardy +heather +tenor +gut +herald +codes +tore +scales +wagon +##oo +luxury +tin +prefer +fountain +triangle +bonds +darling +convoy +dried +traced +beings +troy +accidentally +slam +findings +smelled +joey +lawyers +outcome +steep +bosnia +configuration +shifting +toll +brook +performers +lobby +philosophical +construct +shrine +aggregate +boot +cox +phenomenon +savage +insane +solely +reynolds +lifestyle +##ima +nationally +holdings +consideration +enable +edgar +mo +mama +##tein +fights +relegation +chances +atomic +hub +conjunction +awkward +reactions +currency +finale +kumar +underwent +steering +elaborate +gifts +comprising +melissa +veins +reasonable +sunshine +chi +solve +trails +inhabited +elimination +ethics +huh +ana +molly +consent +apartments +layout +marines +##ces +hunters +bulk +##oma +hometown +##wall +##mont +cracked +reads +neighbouring +withdrawn +admission +wingspan +damned +anthology +lancashire +brands +batting +forgive +cuban +awful +##lyn +104 +dimensions +imagination +##ade +dante +##ship +tracking +desperately +goalkeeper +##yne +groaned +workshops +confident +burton +gerald +milton +circus +uncertain +slope +copenhagen +sophia +fog +philosopher +portraits +accent +cycling +varying +gripped +larvae +garrett +specified +scotia +mature +luther +kurt +rap +##kes +aerial +750 +ferdinand +heated +es +transported +##shan +safely +nonetheless +##orn +##gal +motors +demanding +##sburg +startled +##brook +ally +generate +caps +ghana +stained +demo +mentions +beds +ap +afterward +diary +##bling +utility +##iro +richards +1837 +conspiracy +conscious +shining +footsteps +observer +cyprus +urged +loyalty +developer +probability +olive +upgraded +gym +miracle +insects +graves +1844 +ourselves +hydrogen +amazon +katie +tickets +poets +##pm +planes +##pan +prevention +witnessed +dense +jin +randy +tang +warehouse +monroe +bang +archived +elderly +investigations +alec +granite +mineral +conflicts +controlling +aboriginal +carlo +##zu +mechanics +stan +stark +rhode +skirt +est +##berry +bombs +respected +##horn +imposed +limestone +deny +nominee +memphis +grabbing +disabled +##als +amusement +aa +frankfurt +corn +referendum +varies +slowed +disk +firms +unconscious +incredible +clue +sue +##zhou +twist +##cio +joins +idaho +chad +developers +computing +destroyer +103 +mortal +tucker +kingston +choices +yu +carson +1800 +os +whitney +geneva +pretend +dimension +staged +plateau +maya +##une +freestyle +##bc +rovers +hiv +##ids +tristan +classroom +prospect +##hus +honestly +diploma +lied +thermal +auxiliary +feast +unlikely +iata +##tel +morocco +pounding +treasury +lithuania +considerably +1841 +dish +1812 +geological +matching +stumbled +destroying +marched +brien +advances +cake +nicole +belle +settling +measuring +directing +##mie +tuesday +bassist +capabilities +stunned +fraud +torpedo +##list +##phone +anton +wisdom +surveillance +ruined +##ulate +lawsuit +healthcare +theorem +halls +trend +aka +horizontal +dozens +acquire +lasting +swim +hawk +gorgeous +fees +vicinity +decrease +adoption +tactics +##ography +pakistani +##ole +draws +##hall +willie +burke +heath +algorithm +integral +powder +elliott +brigadier +jackie +tate +varieties +darker +##cho +lately +cigarette +specimens +adds +##ree +##ensis +##inger +exploded +finalist +cia +murders +wilderness +arguments +nicknamed +acceptance +onwards +manufacture +robertson +jets +tampa +enterprises +blog +loudly +composers +nominations +1838 +ai +malta +inquiry +automobile +hosting +viii +rays +tilted +grief +museums +strategies +furious +euro +equality +cohen +poison +surrey +wireless +governed +ridiculous +moses +##esh +##room +vanished +##ito +barnes +attract +morrison +istanbul +##iness +absent +rotation +petition +janet +##logical +satisfaction +custody +deliberately +observatory +comedian +surfaces +pinyin +novelist +strictly +canterbury +oslo +monks +embrace +ibm +jealous +photograph +continent +dorothy +marina +doc +excess +holden +allegations +explaining +stack +avoiding +lance +storyline +majesty +poorly +spike +dos +bradford +raven +travis +classics +proven +voltage +pillow +fists +butt +1842 +interpreted +##car +1839 +gage +telegraph +lens +promising +expelled +casual +collector +zones +##min +silly +nintendo +##kh +##bra +downstairs +chef +suspicious +afl +flies +vacant +uganda +pregnancy +condemned +lutheran +estimates +cheap +decree +saxon +proximity +stripped +idiot +deposits +contrary +presenter +magnus +glacier +im +offense +edwin +##ori +upright +##long +bolt +##ois +toss +geographical +##izes +environments +delicate +marking +abstract +xavier +nails +windsor +plantation +occurring +equity +saskatchewan +fears +drifted +sequences +vegetation +revolt +##stic +1843 +sooner +fusion +opposing +nato +skating +1836 +secretly +ruin +lease +##oc +edit +##nne +flora +anxiety +ruby +##ological +##mia +tel +bout +taxi +emmy +frost +rainbow +compounds +foundations +rainfall +assassination +nightmare +dominican +##win +achievements +deserve +orlando +intact +armenia +##nte +calgary +valentine +106 +marion +proclaimed +theodore +bells +courtyard +thigh +gonzalez +console +troop +minimal +monte +everyday +##ence +##if +supporter +terrorism +buck +openly +presbyterian +activists +carpet +##iers +rubbing +uprising +##yi +cute +conceived +legally +##cht +millennium +cello +velocity +ji +rescued +cardiff +1835 +rex +concentrate +senators +beard +rendered +glowing +battalions +scouts +competitors +sculptor +catalogue +arctic +ion +raja +bicycle +wow +glancing +lawn +##woman +gentleman +lighthouse +publish +predicted +calculated +##val +variants +##gne +strain +##ui +winston +deceased +##nus +touchdowns +brady +caleb +sinking +echoed +crush +hon +blessed +protagonist +hayes +endangered +magnitude +editors +##tine +estimate +responsibilities +##mel +backup +laying +consumed +sealed +zurich +lovers +frustrated +##eau +ahmed +kicking +mit +treasurer +1832 +biblical +refuse +terrified +pump +agrees +genuine +imprisonment +refuses +plymouth +##hen +lou +##nen +tara +trembling +antarctic +ton +learns +##tas +crap +crucial +faction +atop +##borough +wrap +lancaster +odds +hopkins +erik +lyon +##eon +bros +##ode +snap +locality +tips +empress +crowned +cal +acclaimed +chuckled +##ory +clara +sends +mild +towel +##fl +##day +##а +wishing +assuming +interviewed +##bal +##die +interactions +eden +cups +helena +##lf +indie +beck +##fire +batteries +filipino +wizard +parted +##lam +traces +##born +rows +idol +albany +delegates +##ees +##sar +discussions +##ex +notre +instructed +belgrade +highways +suggestion +lauren +possess +orientation +alexandria +abdul +beats +salary +reunion +ludwig +alright +wagner +intimate +pockets +slovenia +hugged +brighton +merchants +cruel +stole +trek +slopes +repairs +enrollment +politically +underlying +promotional +counting +boeing +##bb +isabella +naming +##и +keen +bacteria +listing +separately +belfast +ussr +450 +lithuanian +anybody +ribs +sphere +martinez +cock +embarrassed +proposals +fragments +nationals +##fs +##wski +premises +fin +1500 +alpine +matched +freely +bounded +jace +sleeve +##af +gaming +pier +populated +evident +##like +frances +flooded +##dle +frightened +pour +trainer +framed +visitor +challenging +pig +wickets +##fold +infected +email +##pes +arose +##aw +reward +ecuador +oblast +vale +ch +shuttle +##usa +bach +rankings +forbidden +cornwall +accordance +salem +consumers +bruno +fantastic +toes +machinery +resolved +julius +remembering +propaganda +iceland +bombardment +tide +contacts +wives +##rah +concerto +macdonald +albania +implement +daisy +tapped +sudan +helmet +angela +mistress +##lic +crop +sunk +finest +##craft +hostile +##ute +##tsu +boxer +fr +paths +adjusted +habit +ballot +supervision +soprano +##zen +bullets +wicked +sunset +regiments +disappear +lamp +performs +app +##gia +##oa +rabbit +digging +incidents +entries +##cion +dishes +##oi +introducing +##ati +##fied +freshman +slot +jill +tackles +baroque +backs +##iest +lone +sponsor +destiny +altogether +convert +##aro +consensus +shapes +demonstration +basically +feminist +auction +artifacts +##bing +strongest +twitter +halifax +2019 +allmusic +mighty +smallest +precise +alexandra +viola +##los +##ille +manuscripts +##illo +dancers +ari +managers +monuments +blades +barracks +springfield +maiden +consolidated +electron +##end +berry +airing +wheat +nobel +inclusion +blair +payments +geography +bee +cc +eleanor +react +##hurst +afc +manitoba +##yu +su +lineup +fitness +recreational +investments +airborne +disappointment +##dis +edmonton +viewing +##row +renovation +##cast +infant +bankruptcy +roses +aftermath +pavilion +##yer +carpenter +withdrawal +ladder +##hy +discussing +popped +reliable +agreements +rochester +##abad +curves +bombers +220 +rao +reverend +decreased +choosing +107 +stiff +consulting +naples +crawford +tracy +ka +ribbon +cops +##lee +crushed +deciding +unified +teenager +accepting +flagship +explorer +poles +sanchez +inspection +revived +skilled +induced +exchanged +flee +locals +tragedy +swallow +loading +hanna +demonstrate +##ela +salvador +flown +contestants +civilization +##ines +wanna +rhodes +fletcher +hector +knocking +considers +##ough +nash +mechanisms +sensed +mentally +walt +unclear +##eus +renovated +madame +##cks +crews +governmental +##hin +undertaken +monkey +##ben +##ato +fatal +armored +copa +caves +governance +grasp +perception +certification +froze +damp +tugged +wyoming +##rg +##ero +newman +##lor +nerves +curiosity +graph +115 +##ami +withdraw +tunnels +dull +meredith +moss +exhibits +neighbors +communicate +accuracy +explored +raiders +republicans +secular +kat +superman +penny +criticised +##tch +freed +update +conviction +wade +ham +likewise +delegation +gotta +doll +promises +technological +myth +nationality +resolve +convent +##mark +sharon +dig +sip +coordinator +entrepreneur +fold +##dine +capability +councillor +synonym +blown +swan +cursed +1815 +jonas +haired +sofa +canvas +keeper +rivalry +##hart +rapper +speedway +swords +postal +maxwell +estonia +potter +recurring +##nn +##ave +errors +##oni +cognitive +1834 +##² +claws +nadu +roberto +bce +wrestler +ellie +##ations +infinite +ink +##tia +presumably +finite +staircase +108 +noel +patricia +nacional +##cation +chill +eternal +tu +preventing +prussia +fossil +limbs +##logist +ernst +frog +perez +rene +##ace +pizza +prussian +##ios +##vy +molecules +regulatory +answering +opinions +sworn +lengths +supposedly +hypothesis +upward +habitats +seating +ancestors +drank +yield +hd +synthesis +researcher +modest +##var +mothers +peered +voluntary +homeland +##the +acclaim +##igan +static +valve +luxembourg +alto +carroll +fe +receptor +norton +ambulance +##tian +johnston +catholics +depicting +jointly +elephant +gloria +mentor +badge +ahmad +distinguish +remarked +councils +precisely +allison +advancing +detection +crowded +##10 +cooperative +ankle +mercedes +dagger +surrendered +pollution +commit +subway +jeffrey +lesson +sculptures +provider +##fication +membrane +timothy +rectangular +fiscal +heating +teammate +basket +particle +anonymous +deployment +##ple +missiles +courthouse +proportion +shoe +sec +##ller +complaints +forbes +blacks +abandon +remind +sizes +overwhelming +autobiography +natalie +##awa +risks +contestant +countryside +babies +scorer +invaded +enclosed +proceed +hurling +disorders +##cu +reflecting +continuously +cruiser +graduates +freeway +investigated +ore +deserved +maid +blocking +phillip +jorge +shakes +dove +mann +variables +lacked +burden +accompanying +que +consistently +organizing +provisional +complained +endless +##rm +tubes +juice +georges +krishna +mick +labels +thriller +##uch +laps +arcade +sage +snail +##table +shannon +fi +laurence +seoul +vacation +presenting +hire +churchill +surprisingly +prohibited +savannah +technically +##oli +170 +##lessly +testimony +suited +speeds +toys +romans +mlb +flowering +measurement +talented +kay +settings +charleston +expectations +shattered +achieving +triumph +ceremonies +portsmouth +lanes +mandatory +loser +stretching +cologne +realizes +seventy +cornell +careers +webb +##ulating +americas +budapest +ava +suspicion +##ison +yo +conrad +##hai +sterling +jessie +rector +##az +1831 +transform +organize +loans +christine +volcanic +warrant +slender +summers +subfamily +newer +danced +dynamics +rhine +proceeds +heinrich +gastropod +commands +sings +facilitate +easter +ra +positioned +responses +expense +fruits +yanked +imported +25th +velvet +vic +primitive +tribune +baldwin +neighbourhood +donna +rip +hay +pr +##uro +1814 +espn +welcomed +##aria +qualifier +glare +highland +timing +##cted +shells +eased +geometry +louder +exciting +slovakia +##sion +##iz +##lot +savings +prairie +##ques +marching +rafael +tonnes +##lled +curtain +preceding +shy +heal +greene +worthy +##pot +detachment +bury +sherman +##eck +reinforced +seeks +bottles +contracted +duchess +outfit +walsh +##sc +mickey +##ase +geoffrey +archer +squeeze +dawson +eliminate +invention +##enberg +neal +##eth +stance +dealer +coral +maple +retire +polo +simplified +##ht +1833 +hid +watts +backwards +jules +##oke +genesis +mt +frames +rebounds +burma +woodland +moist +santos +whispers +drained +subspecies +##aa +streaming +ulster +burnt +correspondence +maternal +gerard +denis +stealing +##load +genius +duchy +##oria +inaugurated +momentum +suits +placement +sovereign +clause +thames +##hara +confederation +reservation +sketch +yankees +lets +rotten +charm +hal +verses +ultra +commercially +dot +salon +citation +adopt +winnipeg +mist +allocated +cairo +##boy +jenkins +interference +objectives +##wind +1820 +portfolio +armoured +sectors +##eh +initiatives +##world +integrity +exercises +robe +tap +ab +gazed +##tones +distracted +rulers +111 +favorable +jerome +tended +cart +factories +##eri +diplomat +valued +gravel +charitable +##try +calvin +exploring +chang +shepherd +terrace +pdf +pupil +##ural +reflects +ups +##rch +governors +shelf +depths +##nberg +trailed +crest +tackle +##nian +##ats +hatred +##kai +clare +makers +ethiopia +longtime +detected +embedded +lacking +slapped +rely +thomson +anticipation +iso +morton +successive +agnes +screenwriter +straightened +philippe +playwright +haunted +licence +iris +intentions +sutton +112 +logical +correctly +##weight +branded +licked +tipped +silva +ricky +narrator +requests +##ents +greeted +supernatural +cow +##wald +lung +refusing +employer +strait +gaelic +liner +##piece +zoe +sabha +##mba +driveway +harvest +prints +bates +reluctantly +threshold +algebra +ira +wherever +coupled +240 +assumption +picks +##air +designers +raids +gentlemen +##ean +roller +blowing +leipzig +locks +screw +dressing +strand +##lings +scar +dwarf +depicts +##nu +nods +##mine +differ +boris +##eur +yuan +flip +##gie +mob +invested +questioning +applying +##ture +shout +##sel +gameplay +blamed +illustrations +bothered +weakness +rehabilitation +##of +##zes +envelope +rumors +miners +leicester +subtle +kerry +##ico +ferguson +##fu +premiership +ne +##cat +bengali +prof +catches +remnants +dana +##rily +shouting +presidents +baltic +ought +ghosts +dances +sailors +shirley +fancy +dominic +##bie +madonna +##rick +bark +buttons +gymnasium +ashes +liver +toby +oath +providence +doyle +evangelical +nixon +cement +carnegie +embarked +hatch +surroundings +guarantee +needing +pirate +essence +##bee +filter +crane +hammond +projected +immune +percy +twelfth +##ult +regent +doctoral +damon +mikhail +##ichi +lu +critically +elect +realised +abortion +acute +screening +mythology +steadily +##fc +frown +nottingham +kirk +wa +minneapolis +##rra +module +algeria +mc +nautical +encounters +surprising +statues +availability +shirts +pie +alma +brows +munster +mack +soup +crater +tornado +sanskrit +cedar +explosive +bordered +dixon +planets +stamp +exam +happily +##bble +carriers +kidnapped +##vis +accommodation +emigrated +##met +knockout +correspondent +violation +profits +peaks +lang +specimen +agenda +ancestry +pottery +spelling +equations +obtaining +ki +linking +1825 +debris +asylum +##20 +buddhism +teddy +##ants +gazette +##nger +##sse +dental +eligibility +utc +fathers +averaged +zimbabwe +francesco +coloured +hissed +translator +lynch +mandate +humanities +mackenzie +uniforms +lin +##iana +##gio +asset +mhz +fitting +samantha +genera +wei +rim +beloved +shark +riot +entities +expressions +indo +carmen +slipping +owing +abbot +neighbor +sidney +##av +rats +recommendations +encouraging +squadrons +anticipated +commanders +conquered +##oto +donations +diagnosed +##mond +divide +##iva +guessed +decoration +vernon +auditorium +revelation +conversations +##kers +##power +herzegovina +dash +alike +protested +lateral +herman +accredited +mg +##gent +freeman +mel +fiji +crow +crimson +##rine +livestock +##pped +humanitarian +bored +oz +whip +##lene +##ali +legitimate +alter +grinning +spelled +anxious +oriental +wesley +##nin +##hole +carnival +controller +detect +##ssa +bowed +educator +kosovo +macedonia +##sin +occupy +mastering +stephanie +janeiro +para +unaware +nurses +noon +135 +cam +hopefully +ranger +combine +sociology +polar +rica +##eer +neill +##sman +holocaust +##ip +doubled +lust +1828 +109 +decent +cooling +unveiled +##card +1829 +nsw +homer +chapman +meyer +##gin +dive +mae +reagan +expertise +##gled +darwin +brooke +sided +prosecution +investigating +comprised +petroleum +genres +reluctant +differently +trilogy +johns +vegetables +corpse +highlighted +lounge +pension +unsuccessfully +elegant +aided +ivory +beatles +amelia +cain +dubai +sunny +immigrant +babe +click +##nder +underwater +pepper +combining +mumbled +atlas +horns +accessed +ballad +physicians +homeless +gestured +rpm +freak +louisville +corporations +patriots +prizes +rational +warn +modes +decorative +overnight +din +troubled +phantom +##ort +monarch +sheer +##dorf +generals +guidelines +organs +addresses +##zon +enhance +curling +parishes +cord +##kie +linux +caesar +deutsche +bavaria +##bia +coleman +cyclone +##eria +bacon +petty +##yama +##old +hampton +diagnosis +1824 +throws +complexity +rita +disputed +##₃ +pablo +##sch +marketed +trafficking +##ulus +examine +plague +formats +##oh +vault +faithful +##bourne +webster +##ox +highlights +##ient +##ann +phones +vacuum +sandwich +modeling +##gated +bolivia +clergy +qualities +isabel +##nas +##ars +wears +screams +reunited +annoyed +bra +##ancy +##rate +differential +transmitter +tattoo +container +poker +##och +excessive +resides +cowboys +##tum +augustus +trash +providers +statute +retreated +balcony +reversed +void +storey +preceded +masses +leap +laughs +neighborhoods +wards +schemes +falcon +santo +battlefield +pad +ronnie +thread +lesbian +venus +##dian +beg +sandstone +daylight +punched +gwen +analog +stroked +wwe +acceptable +measurements +dec +toxic +##kel +adequate +surgical +economist +parameters +varsity +##sberg +quantity +ella +##chy +##rton +countess +generating +precision +diamonds +expressway +ga +##ı +1821 +uruguay +talents +galleries +expenses +scanned +colleague +outlets +ryder +lucien +##ila +paramount +##bon +syracuse +dim +fangs +gown +sweep +##sie +toyota +missionaries +websites +##nsis +sentences +adviser +val +trademark +spells +##plane +patience +starter +slim +##borg +toe +incredibly +shoots +elliot +nobility +##wyn +cowboy +endorsed +gardner +tendency +persuaded +organisms +emissions +kazakhstan +amused +boring +chips +themed +##hand +llc +constantinople +chasing +systematic +guatemala +borrowed +erin +carey +##hard +highlands +struggles +1810 +##ifying +##ced +wong +exceptions +develops +enlarged +kindergarten +castro +##ern +##rina +leigh +zombie +juvenile +##most +consul +##nar +sailor +hyde +clarence +intensive +pinned +nasty +useless +jung +clayton +stuffed +exceptional +ix +apostolic +230 +transactions +##dge +exempt +swinging +cove +religions +##ash +shields +dairy +bypass +190 +pursuing +bug +joyce +bombay +chassis +southampton +chat +interact +redesignated +##pen +nascar +pray +salmon +rigid +regained +malaysian +grim +publicity +constituted +capturing +toilet +delegate +purely +tray +drift +loosely +striker +weakened +trinidad +mitch +itv +defines +transmitted +ming +scarlet +nodding +fitzgerald +fu +narrowly +sp +tooth +standings +virtue +##₁ +##wara +##cting +chateau +gloves +lid +##nel +hurting +conservatory +##pel +sinclair +reopened +sympathy +nigerian +strode +advocated +optional +chronic +discharge +##rc +suck +compatible +laurel +stella +shi +fails +wage +dodge +128 +informal +sorts +levi +buddha +villagers +##aka +chronicles +heavier +summoned +gateway +3000 +eleventh +jewelry +translations +accordingly +seas +##ency +fiber +pyramid +cubic +dragging +##ista +caring +##ops +android +contacted +lunar +##dt +kai +lisbon +patted +1826 +sacramento +theft +madagascar +subtropical +disputes +ta +holidays +piper +willow +mare +cane +itunes +newfoundland +benny +companions +dong +raj +observe +roar +charming +plaque +tibetan +fossils +enacted +manning +bubble +tina +tanzania +##eda +##hir +funk +swamp +deputies +cloak +ufc +scenario +par +scratch +metals +anthem +guru +engaging +specially +##boat +dialects +nineteen +cecil +duet +disability +messenger +unofficial +##lies +defunct +eds +moonlight +drainage +surname +puzzle +honda +switching +conservatives +mammals +knox +broadcaster +sidewalk +cope +##ried +benson +princes +peterson +##sal +bedford +sharks +eli +wreck +alberto +gasp +archaeology +lgbt +teaches +securities +madness +compromise +waving +coordination +davidson +visions +leased +possibilities +eighty +jun +fernandez +enthusiasm +assassin +sponsorship +reviewer +kingdoms +estonian +laboratories +##fy +##nal +applies +verb +celebrations +##zzo +rowing +lightweight +sadness +submit +mvp +balanced +dude +##vas +explicitly +metric +magnificent +mound +brett +mohammad +mistakes +irregular +##hing +##ass +sanders +betrayed +shipped +surge +##enburg +reporters +termed +georg +pity +verbal +bulls +abbreviated +enabling +appealed +##are +##atic +sicily +sting +heel +sweetheart +bart +spacecraft +brutal +monarchy +##tter +aberdeen +cameo +diane +##ub +survivor +clyde +##aries +complaint +##makers +clarinet +delicious +chilean +karnataka +coordinates +1818 +panties +##rst +pretending +ar +dramatically +kiev +bella +tends +distances +113 +catalog +launching +instances +telecommunications +portable +lindsay +vatican +##eim +angles +aliens +marker +stint +screens +bolton +##rne +judy +wool +benedict +plasma +europa +spark +imaging +filmmaker +swiftly +##een +contributor +##nor +opted +stamps +apologize +financing +butter +gideon +sophisticated +alignment +avery +chemicals +yearly +speculation +prominence +professionally +##ils +immortal +institutional +inception +wrists +identifying +tribunal +derives +gains +##wo +papal +preference +linguistic +vince +operative +brewery +##ont +unemployment +boyd +##ured +##outs +albeit +prophet +1813 +bi +##rr +##face +##rad +quarterly +asteroid +cleaned +radius +temper +##llen +telugu +jerk +viscount +menu +##ote +glimpse +##aya +yacht +hawaiian +baden +##rl +laptop +readily +##gu +monetary +offshore +scots +watches +##yang +##arian +upgrade +needle +xbox +lea +encyclopedia +flank +fingertips +##pus +delight +teachings +confirm +roth +beaches +midway +winters +##iah +teasing +daytime +beverly +gambling +bonnie +##backs +regulated +clement +hermann +tricks +knot +##shing +##uring +##vre +detached +ecological +owed +specialty +byron +inventor +bats +stays +screened +unesco +midland +trim +affection +##ander +##rry +jess +thoroughly +feedback +##uma +chennai +strained +heartbeat +wrapping +overtime +pleaded +##sworth +mon +leisure +oclc +##tate +##ele +feathers +angelo +thirds +nuts +surveys +clever +gill +commentator +##dos +darren +rides +gibraltar +##nc +##mu +dissolution +dedication +shin +meals +saddle +elvis +reds +chaired +taller +appreciation +functioning +niece +favored +advocacy +robbie +criminals +suffolk +yugoslav +passport +constable +congressman +hastings +vera +##rov +consecrated +sparks +ecclesiastical +confined +##ovich +muller +floyd +nora +1822 +paved +1827 +cumberland +ned +saga +spiral +##flow +appreciated +yi +collaborative +treating +similarities +feminine +finishes +##ib +jade +import +##nse +##hot +champagne +mice +securing +celebrities +helsinki +attributes +##gos +cousins +phases +ache +lucia +gandhi +submission +vicar +spear +shine +tasmania +biting +detention +constitute +tighter +seasonal +##gus +terrestrial +matthews +##oka +effectiveness +parody +philharmonic +##onic +1816 +strangers +encoded +consortium +guaranteed +regards +shifts +tortured +collision +supervisor +inform +broader +insight +theaters +armour +emeritus +blink +incorporates +mapping +##50 +##ein +handball +flexible +##nta +substantially +generous +thief +##own +carr +loses +1793 +prose +ucla +romeo +generic +metallic +realization +damages +mk +commissioners +zach +default +##ther +helicopters +lengthy +stems +spa +partnered +spectators +rogue +indication +penalties +teresa +1801 +sen +##tric +dalton +##wich +irving +photographic +##vey +dell +deaf +peters +excluded +unsure +##vable +patterson +crawled +##zio +resided +whipped +latvia +slower +ecole +pipes +employers +maharashtra +comparable +va +textile +pageant +##gel +alphabet +binary +irrigation +chartered +choked +antoine +offs +waking +supplement +##wen +quantities +demolition +regain +locate +urdu +folks +alt +114 +##mc +scary +andreas +whites +##ava +classrooms +mw +aesthetic +publishes +valleys +guides +cubs +johannes +bryant +conventions +affecting +##itt +drain +awesome +isolation +prosecutor +ambitious +apology +captive +downs +atmospheric +lorenzo +aisle +beef +foul +##onia +kidding +composite +disturbed +illusion +natives +##ffer +emi +rockets +riverside +wartime +painters +adolf +melted +##ail +uncertainty +simulation +hawks +progressed +meantime +builder +spray +breach +unhappy +regina +russians +##urg +determining +##tation +tram +1806 +##quin +aging +##12 +1823 +garion +rented +mister +diaz +terminated +clip +1817 +depend +nervously +disco +owe +defenders +shiva +notorious +disbelief +shiny +worcester +##gation +##yr +trailing +undertook +islander +belarus +limitations +watershed +fuller +overlooking +utilized +raphael +1819 +synthetic +breakdown +klein +##nate +moaned +memoir +lamb +practicing +##erly +cellular +arrows +exotic +##graphy +witches +117 +charted +rey +hut +hierarchy +subdivision +freshwater +giuseppe +aloud +reyes +qatar +marty +sideways +utterly +sexually +jude +prayers +mccarthy +softball +blend +damien +##gging +##metric +wholly +erupted +lebanese +negro +revenues +tasted +comparative +teamed +transaction +labeled +maori +sovereignty +parkway +trauma +gran +malay +121 +advancement +descendant +2020 +buzz +salvation +inventory +symbolic +##making +antarctica +mps +##gas +##bro +mohammed +myanmar +holt +submarines +tones +##lman +locker +patriarch +bangkok +emerson +remarks +predators +kin +afghan +confession +norwich +rental +emerge +advantages +##zel +rca +##hold +shortened +storms +aidan +##matic +autonomy +compliance +##quet +dudley +atp +##osis +1803 +motto +documentation +summary +professors +spectacular +christina +archdiocese +flashing +innocence +remake +##dell +psychic +reef +scare +employ +rs +sticks +meg +gus +leans +##ude +accompany +bergen +tomas +##iko +doom +wages +pools +##nch +##bes +breasts +scholarly +alison +outline +brittany +breakthrough +willis +realistic +##cut +##boro +competitor +##stan +pike +picnic +icon +designing +commercials +washing +villain +skiing +micro +costumes +auburn +halted +executives +##hat +logistics +cycles +vowel +applicable +barrett +exclaimed +eurovision +eternity +ramon +##umi +##lls +modifications +sweeping +disgust +##uck +torch +aviv +ensuring +rude +dusty +sonic +donovan +outskirts +cu +pathway +##band +##gun +##lines +disciplines +acids +cadet +paired +##40 +sketches +##sive +marriages +##⁺ +folding +peers +slovak +implies +admired +##beck +1880s +leopold +instinct +attained +weston +megan +horace +##ination +dorsal +ingredients +evolutionary +##its +complications +deity +lethal +brushing +levy +deserted +institutes +posthumously +delivering +telescope +coronation +motivated +rapids +luc +flicked +pays +volcano +tanner +weighed +##nica +crowds +frankie +gifted +addressing +granddaughter +winding +##rna +constantine +gomez +##front +landscapes +rudolf +anthropology +slate +werewolf +##lio +astronomy +circa +rouge +dreaming +sack +knelt +drowned +naomi +prolific +tracked +freezing +herb +##dium +agony +randall +twisting +wendy +deposit +touches +vein +wheeler +##bbled +##bor +batted +retaining +tire +presently +compare +specification +daemon +nigel +##grave +merry +recommendation +czechoslovakia +sandra +ng +roma +##sts +lambert +inheritance +sheikh +winchester +cries +examining +##yle +comeback +cuisine +nave +##iv +ko +retrieve +tomatoes +barker +polished +defining +irene +lantern +personalities +begging +tract +swore +1809 +175 +##gic +omaha +brotherhood +##rley +haiti +##ots +exeter +##ete +##zia +steele +dumb +pearson +210 +surveyed +elisabeth +trends +##ef +fritz +##rf +premium +bugs +fraction +calmly +viking +##birds +tug +inserted +unusually +##ield +confronted +distress +crashing +brent +turks +resign +##olo +cambodia +gabe +sauce +##kal +evelyn +116 +extant +clusters +quarry +teenagers +luna +##lers +##ister +affiliation +drill +##ashi +panthers +scenic +libya +anita +strengthen +inscriptions +##cated +lace +sued +judith +riots +##uted +mint +##eta +preparations +midst +dub +challenger +##vich +mock +cf +displaced +wicket +breaths +enables +schmidt +analyst +##lum +ag +highlight +automotive +axe +josef +newark +sufficiently +resembles +50th +##pal +flushed +mum +traits +##ante +commodore +incomplete +warming +titular +ceremonial +ethical +118 +celebrating +eighteenth +cao +lima +medalist +mobility +strips +snakes +##city +miniature +zagreb +barton +escapes +umbrella +automated +doubted +differs +cooled +georgetown +dresden +cooked +fade +wyatt +rna +jacobs +carlton +abundant +stereo +boost +madras +inning +##hia +spur +ip +malayalam +begged +osaka +groan +escaping +charging +dose +vista +##aj +bud +papa +communists +advocates +edged +tri +##cent +resemble +peaking +necklace +fried +montenegro +saxony +goose +glances +stuttgart +curator +recruit +grocery +sympathetic +##tting +##fort +127 +lotus +randolph +ancestor +##rand +succeeding +jupiter +1798 +macedonian +##heads +hiking +1808 +handing +fischer +##itive +garbage +node +##pies +prone +singular +papua +inclined +attractions +italia +pouring +motioned +grandma +garnered +jacksonville +corp +ego +ringing +aluminum +##hausen +ordering +##foot +drawer +traders +synagogue +##play +##kawa +resistant +wandering +fragile +fiona +teased +var +hardcore +soaked +jubilee +decisive +exposition +mercer +poster +valencia +hale +kuwait +1811 +##ises +##wr +##eed +tavern +gamma +122 +johan +##uer +airways +amino +gil +##ury +vocational +domains +torres +##sp +generator +folklore +outcomes +##keeper +canberra +shooter +fl +beams +confrontation +##lling +##gram +feb +aligned +forestry +pipeline +jax +motorway +conception +decay +##tos +coffin +##cott +stalin +1805 +escorted +minded +##nam +sitcom +purchasing +twilight +veronica +additions +passive +tensions +straw +123 +frequencies +1804 +refugee +cultivation +##iate +christie +clary +bulletin +crept +disposal +##rich +##zong +processor +crescent +##rol +bmw +emphasized +whale +nazis +aurora +##eng +dwelling +hauled +sponsors +toledo +mega +ideology +theatres +tessa +cerambycidae +saves +turtle +cone +suspects +kara +rusty +yelling +greeks +mozart +shades +cocked +participant +##tro +shire +spit +freeze +necessity +##cos +inmates +nielsen +councillors +loaned +uncommon +omar +peasants +botanical +offspring +daniels +formations +jokes +1794 +pioneers +sigma +licensing +##sus +wheelchair +polite +1807 +liquor +pratt +trustee +##uta +forewings +balloon +##zz +kilometre +camping +explicit +casually +shawn +foolish +teammates +nm +hassan +carrie +judged +satisfy +vanessa +knives +selective +cnn +flowed +##lice +eclipse +stressed +eliza +mathematician +cease +cultivated +##roy +commissions +browns +##ania +destroyers +sheridan +meadow +##rius +minerals +##cial +downstream +clash +gram +memoirs +ventures +baha +seymour +archie +midlands +edith +fare +flynn +invite +canceled +tiles +stabbed +boulder +incorporate +amended +camden +facial +mollusk +unreleased +descriptions +yoga +grabs +550 +raises +ramp +shiver +##rose +coined +pioneering +tunes +qing +warwick +tops +119 +melanie +giles +##rous +wandered +##inal +annexed +nov +30th +unnamed +##ished +organizational +airplane +normandy +stoke +whistle +blessing +violations +chased +holders +shotgun +##ctic +outlet +reactor +##vik +tires +tearing +shores +fortified +mascot +constituencies +nc +columnist +productive +tibet +##rta +lineage +hooked +oct +tapes +judging +cody +##gger +hansen +kashmir +triggered +##eva +solved +cliffs +##tree +resisted +anatomy +protesters +transparent +implied +##iga +injection +mattress +excluding +##mbo +defenses +helpless +devotion +##elli +growl +liberals +weber +phenomena +atoms +plug +##iff +mortality +apprentice +howe +convincing +aaa +swimmer +barber +leone +promptly +sodium +def +nowadays +arise +##oning +gloucester +corrected +dignity +norm +erie +##ders +elders +evacuated +sylvia +compression +##yar +hartford +pose +backpack +reasoning +accepts +24th +wipe +millimetres +marcel +##oda +dodgers +albion +1790 +overwhelmed +aerospace +oaks +1795 +showcase +acknowledge +recovering +nolan +ashe +hurts +geology +fashioned +disappearance +farewell +swollen +shrug +marquis +wimbledon +124 +rue +1792 +commemorate +reduces +experiencing +inevitable +calcutta +intel +##court +murderer +sticking +fisheries +imagery +bloom +280 +brake +##inus +gustav +hesitation +memorable +po +viral +beans +accidents +tunisia +antenna +spilled +consort +treatments +aye +perimeter +##gard +donation +hostage +migrated +banker +addiction +apex +lil +trout +##ously +conscience +##nova +rams +sands +genome +passionate +troubles +##lets +##set +amid +##ibility +##ret +higgins +exceed +vikings +##vie +payne +##zan +muscular +##ste +defendant +sucking +##wal +ibrahim +fuselage +claudia +vfl +europeans +snails +interval +##garh +preparatory +statewide +tasked +lacrosse +viktor +##lation +angola +##hra +flint +implications +employs +teens +patrons +stall +weekends +barriers +scrambled +nucleus +tehran +jenna +parsons +lifelong +robots +displacement +5000 +##bles +precipitation +##gt +knuckles +clutched +1802 +marrying +ecology +marx +accusations +declare +scars +kolkata +mat +meadows +bermuda +skeleton +finalists +vintage +crawl +coordinate +affects +subjected +orchestral +mistaken +##tc +mirrors +dipped +relied +260 +arches +candle +##nick +incorporating +wildly +fond +basilica +owl +fringe +rituals +whispering +stirred +feud +tertiary +slick +goat +honorable +whereby +skip +ricardo +stripes +parachute +adjoining +submerged +synthesizer +##gren +intend +positively +ninety +phi +beaver +partition +fellows +alexis +prohibition +carlisle +bizarre +fraternity +##bre +doubts +icy +cbc +aquatic +sneak +sonny +combines +airports +crude +supervised +spatial +merge +alfonso +##bic +corrupt +scan +undergo +##ams +disabilities +colombian +comparing +dolphins +perkins +##lish +reprinted +unanimous +bounced +hairs +underworld +midwest +semester +bucket +paperback +miniseries +coventry +demise +##leigh +demonstrations +sensor +rotating +yan +##hler +arrange +soils +##idge +hyderabad +labs +##dr +brakes +grandchildren +##nde +negotiated +rover +ferrari +continuation +directorate +augusta +stevenson +counterpart +gore +##rda +nursery +rican +ave +collectively +broadly +pastoral +repertoire +asserted +discovering +nordic +styled +fiba +cunningham +harley +middlesex +survives +tumor +tempo +zack +aiming +lok +urgent +##rade +##nto +devils +##ement +contractor +turin +##wl +##ool +bliss +repaired +simmons +moan +astronomical +cr +negotiate +lyric +1890s +lara +bred +clad +angus +pbs +##ience +engineered +posed +##lk +hernandez +possessions +elbows +psychiatric +strokes +confluence +electorate +lifts +campuses +lava +alps +##ep +##ution +##date +physicist +woody +##page +##ographic +##itis +juliet +reformation +sparhawk +320 +complement +suppressed +jewel +##½ +floated +##kas +continuity +sadly +##ische +inability +melting +scanning +paula +flour +judaism +safer +vague +##lm +solving +curb +##stown +financially +gable +bees +expired +miserable +cassidy +dominion +1789 +cupped +145 +robbery +facto +amos +warden +resume +tallest +marvin +ing +pounded +usd +declaring +gasoline +##aux +darkened +270 +650 +sophomore +##mere +erection +gossip +televised +risen +dial +##eu +pillars +##link +passages +profound +##tina +arabian +ashton +silicon +nail +##ead +##lated +##wer +##hardt +fleming +firearms +ducked +circuits +blows +waterloo +titans +##lina +atom +fireplace +cheshire +financed +activation +algorithms +##zzi +constituent +catcher +cherokee +partnerships +sexuality +platoon +tragic +vivian +guarded +whiskey +meditation +poetic +##late +##nga +##ake +porto +listeners +dominance +kendra +mona +chandler +factions +22nd +salisbury +attitudes +derivative +##ido +##haus +intake +paced +javier +illustrator +barrels +bias +cockpit +burnett +dreamed +ensuing +##anda +receptors +someday +hawkins +mattered +##lal +slavic +1799 +jesuit +cameroon +wasted +tai +wax +lowering +victorious +freaking +outright +hancock +librarian +sensing +bald +calcium +myers +tablet +announcing +barack +shipyard +pharmaceutical +##uan +greenwich +flush +medley +patches +wolfgang +pt +speeches +acquiring +exams +nikolai +##gg +hayden +kannada +##type +reilly +##pt +waitress +abdomen +devastated +capped +pseudonym +pharmacy +fulfill +paraguay +1796 +clicked +##trom +archipelago +syndicated +##hman +lumber +orgasm +rejection +clifford +lorraine +advent +mafia +rodney +brock +##ght +##used +##elia +cassette +chamberlain +despair +mongolia +sensors +developmental +upstream +##eg +##alis +spanning +165 +trombone +basque +seeded +interred +renewable +rhys +leapt +revision +molecule +##ages +chord +vicious +nord +shivered +23rd +arlington +debts +corpus +sunrise +bays +blackburn +centimetres +##uded +shuddered +gm +strangely +gripping +cartoons +isabelle +orbital +##ppa +seals +proving +##lton +refusal +strengthened +bust +assisting +baghdad +batsman +portrayal +mara +pushes +spears +og +##cock +reside +nathaniel +brennan +1776 +confirmation +caucus +##worthy +markings +yemen +nobles +ku +lazy +viewer +catalan +encompasses +sawyer +##fall +sparked +substances +patents +braves +arranger +evacuation +sergio +persuade +dover +tolerance +penguin +cum +jockey +insufficient +townships +occupying +declining +plural +processed +projection +puppet +flanders +introduces +liability +##yon +gymnastics +antwerp +taipei +hobart +candles +jeep +wes +observers +126 +chaplain +bundle +glorious +##hine +hazel +flung +sol +excavations +dumped +stares +sh +bangalore +triangular +icelandic +intervals +expressing +turbine +##vers +songwriting +crafts +##igo +jasmine +ditch +rite +##ways +entertaining +comply +sorrow +wrestlers +basel +emirates +marian +rivera +helpful +##some +caution +downward +networking +##atory +##tered +darted +genocide +emergence +replies +specializing +spokesman +convenient +unlocked +fading +augustine +concentrations +resemblance +elijah +investigator +andhra +##uda +promotes +bean +##rrell +fleeing +wan +simone +announcer +##ame +##bby +lydia +weaver +132 +residency +modification +##fest +stretches +##ast +alternatively +nat +lowe +lacks +##ented +pam +tile +concealed +inferior +abdullah +residences +tissues +vengeance +##ided +moisture +peculiar +groove +zip +bologna +jennings +ninja +oversaw +zombies +pumping +batch +livingston +emerald +installations +1797 +peel +nitrogen +rama +##fying +##star +schooling +strands +responding +werner +##ost +lime +casa +accurately +targeting +##rod +underway +##uru +hemisphere +lester +##yard +occupies +2d +griffith +angrily +reorganized +##owing +courtney +deposited +##dd +##30 +estadio +##ifies +dunn +exiled +##ying +checks +##combe +##о +##fly +successes +unexpectedly +blu +assessed +##flower +##ه +observing +sacked +spiders +kn +##tail +mu +nodes +prosperity +audrey +divisional +155 +broncos +tangled +adjust +feeds +erosion +paolo +surf +directory +snatched +humid +admiralty +screwed +gt +reddish +##nese +modules +trench +lamps +bind +leah +bucks +competes +##nz +##form +transcription +##uc +isles +violently +clutching +pga +cyclist +inflation +flats +ragged +unnecessary +##hian +stubborn +coordinated +harriet +baba +disqualified +330 +insect +wolfe +##fies +reinforcements +rocked +duel +winked +embraced +bricks +##raj +hiatus +defeats +pending +brightly +jealousy +##xton +##hm +##uki +lena +gdp +colorful +##dley +stein +kidney +##shu +underwear +wanderers +##haw +##icus +guardians +m³ +roared +habits +##wise +permits +gp +uranium +punished +disguise +bundesliga +elise +dundee +erotic +partisan +pi +collectors +float +individually +rendering +behavioral +bucharest +ser +hare +valerie +corporal +nutrition +proportional +##isa +immense +##kis +pavement +##zie +##eld +sutherland +crouched +1775 +##lp +suzuki +trades +endurance +operas +crosby +prayed +priory +rory +socially +##urn +gujarat +##pu +walton +cube +pasha +privilege +lennon +floods +thorne +waterfall +nipple +scouting +approve +##lov +minorities +voter +dwight +extensions +assure +ballroom +slap +dripping +privileges +rejoined +confessed +demonstrating +patriotic +yell +investor +##uth +pagan +slumped +squares +##cle +##kins +confront +bert +embarrassment +##aid +aston +urging +sweater +starr +yuri +brains +williamson +commuter +mortar +structured +selfish +exports +##jon +cds +##him +unfinished +##rre +mortgage +destinations +##nagar +canoe +solitary +buchanan +delays +magistrate +fk +##pling +motivation +##lier +##vier +recruiting +assess +##mouth +malik +antique +1791 +pius +rahman +reich +tub +zhou +smashed +airs +galway +xii +conditioning +honduras +discharged +dexter +##pf +lionel +129 +debates +lemon +tiffany +volunteered +dom +dioxide +procession +devi +sic +tremendous +advertisements +colts +transferring +verdict +hanover +decommissioned +utter +relate +pac +racism +##top +beacon +limp +similarity +terra +occurrence +ant +##how +becky +capt +updates +armament +richie +pal +##graph +halloween +mayo +##ssen +##bone +cara +serena +fcc +dolls +obligations +##dling +violated +lafayette +jakarta +exploitation +##ime +infamous +iconic +##lah +##park +kitty +moody +reginald +dread +spill +crystals +olivier +modeled +bluff +equilibrium +separating +notices +ordnance +extinction +onset +cosmic +attachment +sammy +expose +privy +anchored +##bil +abbott +admits +bending +baritone +emmanuel +policeman +vaughan +winged +climax +dresses +denny +polytechnic +mohamed +burmese +authentic +nikki +genetics +grandparents +homestead +gaza +postponed +metacritic +una +##sby +##bat +unstable +dissertation +##rial +##cian +curls +obscure +uncovered +bronx +praying +disappearing +##hoe +prehistoric +coke +turret +mutations +nonprofit +pits +monaco +##ي +##usion +prominently +dispatched +podium +##mir +uci +##uation +133 +fortifications +birthplace +kendall +##lby +##oll +preacher +rack +goodman +##rman +persistent +##ott +countless +jaime +recorder +lexington +persecution +jumps +renewal +wagons +##11 +crushing +##holder +decorations +##lake +abundance +wrath +laundry +£1 +garde +##rp +jeanne +beetles +peasant +##sl +splitting +caste +sergei +##rer +##ema +scripts +##ively +rub +satellites +##vor +inscribed +verlag +scrapped +gale +packages +chick +potato +slogan +kathleen +arabs +##culture +counterparts +reminiscent +choral +##tead +rand +retains +bushes +dane +accomplish +courtesy +closes +##oth +slaughter +hague +krakow +lawson +tailed +elias +ginger +##ttes +canopy +betrayal +rebuilding +turf +##hof +frowning +allegiance +brigades +kicks +rebuild +polls +alias +nationalism +td +rowan +audition +bowie +fortunately +recognizes +harp +dillon +horrified +##oro +renault +##tics +ropes +##α +presumed +rewarded +infrared +wiping +accelerated +illustration +##rid +presses +practitioners +badminton +##iard +detained +##tera +recognizing +relates +misery +##sies +##tly +reproduction +piercing +potatoes +thornton +esther +manners +hbo +##aan +ours +bullshit +ernie +perennial +sensitivity +illuminated +rupert +##jin +##iss +##ear +rfc +nassau +##dock +staggered +socialism +##haven +appointments +nonsense +prestige +sharma +haul +##tical +solidarity +gps +##ook +##rata +igor +pedestrian +##uit +baxter +tenants +wires +medication +unlimited +guiding +impacts +diabetes +##rama +sasha +pas +clive +extraction +131 +continually +constraints +##bilities +sonata +hunted +sixteenth +chu +planting +quote +mayer +pretended +abs +spat +##hua +ceramic +##cci +curtains +pigs +pitching +##dad +latvian +sore +dayton +##sted +##qi +patrols +slice +playground +##nted +shone +stool +apparatus +inadequate +mates +treason +##ija +desires +##liga +##croft +somalia +laurent +mir +leonardo +oracle +grape +obliged +chevrolet +thirteenth +stunning +enthusiastic +##ede +accounted +concludes +currents +basil +##kovic +drought +##rica +mai +##aire +shove +posting +##shed +pilgrimage +humorous +packing +fry +pencil +wines +smells +144 +marilyn +aching +newest +clung +bon +neighbours +sanctioned +##pie +mug +##stock +drowning +##mma +hydraulic +##vil +hiring +reminder +lilly +investigators +##ncies +sour +##eous +compulsory +packet +##rion +##graphic +##elle +cannes +##inate +depressed +##rit +heroic +importantly +theresa +##tled +conway +saturn +marginal +rae +##xia +corresponds +royce +pact +jasper +explosives +packaging +aluminium +##ttered +denotes +rhythmic +spans +assignments +hereditary +outlined +originating +sundays +lad +reissued +greeting +beatrice +##dic +pillar +marcos +plots +handbook +alcoholic +judiciary +avant +slides +extract +masculine +blur +##eum +##force +homage +trembled +owens +hymn +trey +omega +signaling +socks +accumulated +reacted +attic +theo +lining +angie +distraction +primera +talbot +##key +1200 +ti +creativity +billed +##hey +deacon +eduardo +identifies +proposition +dizzy +gunner +hogan +##yam +##pping +##hol +ja +##chan +jensen +reconstructed +##berger +clearance +darius +##nier +abe +harlem +plea +dei +circled +emotionally +notation +fascist +neville +exceeded +upwards +viable +ducks +##fo +workforce +racer +limiting +shri +##lson +possesses +1600 +kerr +moths +devastating +laden +disturbing +locking +##cture +gal +fearing +accreditation +flavor +aide +1870s +mountainous +##baum +melt +##ures +motel +texture +servers +soda +##mb +herd +##nium +erect +puzzled +hum +peggy +examinations +gould +testified +geoff +ren +devised +sacks +##law +denial +posters +grunted +cesar +tutor +ec +gerry +offerings +byrne +falcons +combinations +ct +incoming +pardon +rocking +26th +avengers +flared +mankind +seller +uttar +loch +nadia +stroking +exposing +##hd +fertile +ancestral +instituted +##has +noises +prophecy +taxation +eminent +vivid +pol +##bol +dart +indirect +multimedia +notebook +upside +displaying +adrenaline +referenced +geometric +##iving +progression +##ddy +blunt +announce +##far +implementing +##lav +aggression +liaison +cooler +cares +headache +plantations +gorge +dots +impulse +thickness +ashamed +averaging +kathy +obligation +precursor +137 +fowler +symmetry +thee +225 +hears +##rai +undergoing +ads +butcher +bowler +##lip +cigarettes +subscription +goodness +##ically +browne +##hos +##tech +kyoto +donor +##erty +damaging +friction +drifting +expeditions +hardened +prostitution +152 +fauna +blankets +claw +tossing +snarled +butterflies +recruits +investigative +coated +healed +138 +communal +hai +xiii +academics +boone +psychologist +restless +lahore +stephens +mba +brendan +foreigners +printer +##pc +ached +explode +27th +deed +scratched +dared +##pole +cardiac +1780 +okinawa +proto +commando +compelled +oddly +electrons +##base +replica +thanksgiving +##rist +sheila +deliberate +stafford +tidal +representations +hercules +ou +##path +##iated +kidnapping +lenses +##tling +deficit +samoa +mouths +consuming +computational +maze +granting +smirk +razor +fixture +ideals +inviting +aiden +nominal +##vs +issuing +julio +pitt +ramsey +docks +##oss +exhaust +##owed +bavarian +draped +anterior +mating +ethiopian +explores +noticing +##nton +discarded +convenience +hoffman +endowment +beasts +cartridge +mormon +paternal +probe +sleeves +interfere +lump +deadline +##rail +jenks +bulldogs +scrap +alternating +justified +reproductive +nam +seize +descending +secretariat +kirby +coupe +grouped +smash +panther +sedan +tapping +##18 +lola +cheer +germanic +unfortunate +##eter +unrelated +##fan +subordinate +##sdale +suzanne +advertisement +##ility +horsepower +##lda +cautiously +discourse +luigi +##mans +##fields +noun +prevalent +mao +schneider +everett +surround +governorate +kira +##avia +westward +##take +misty +rails +sustainability +134 +unused +##rating +packs +toast +unwilling +regulate +thy +suffrage +nile +awe +assam +definitions +travelers +affordable +##rb +conferred +sells +undefeated +beneficial +torso +basal +repeating +remixes +##pass +bahrain +cables +fang +##itated +excavated +numbering +statutory +##rey +deluxe +##lian +forested +ramirez +derbyshire +zeus +slamming +transfers +astronomer +banana +lottery +berg +histories +bamboo +##uchi +resurrection +posterior +bowls +vaguely +##thi +thou +preserving +tensed +offence +##inas +meyrick +callum +ridden +watt +langdon +tying +lowland +snorted +daring +truman +##hale +##girl +aura +overly +filing +weighing +goa +infections +philanthropist +saunders +eponymous +##owski +latitude +perspectives +reviewing +mets +commandant +radial +##kha +flashlight +reliability +koch +vowels +amazed +ada +elaine +supper +##rth +##encies +predator +debated +soviets +cola +##boards +##nah +compartment +crooked +arbitrary +fourteenth +##ctive +havana +majors +steelers +clips +profitable +ambush +exited +packers +##tile +nude +cracks +fungi +##е +limb +trousers +josie +shelby +tens +frederic +##ος +definite +smoothly +constellation +insult +baton +discs +lingering +##nco +conclusions +lent +staging +becker +grandpa +shaky +##tron +einstein +obstacles +sk +adverse +elle +economically +##moto +mccartney +thor +dismissal +motions +readings +nostrils +treatise +##pace +squeezing +evidently +prolonged +1783 +venezuelan +je +marguerite +beirut +takeover +shareholders +##vent +denise +digit +airplay +norse +##bbling +imaginary +pills +hubert +blaze +vacated +eliminating +##ello +vine +mansfield +##tty +retrospective +barrow +borne +clutch +bail +forensic +weaving +##nett +##witz +desktop +citadel +promotions +worrying +dorset +ieee +subdivided +##iating +manned +expeditionary +pickup +synod +chuckle +185 +barney +##rz +##ffin +functionality +karachi +litigation +meanings +uc +lick +turbo +anders +##ffed +execute +curl +oppose +ankles +typhoon +##د +##ache +##asia +linguistics +compassion +pressures +grazing +perfection +##iting +immunity +monopoly +muddy +backgrounds +136 +namibia +francesca +monitors +attracting +stunt +tuition +##ии +vegetable +##mates +##quent +mgm +jen +complexes +forts +##ond +cellar +bites +seventeenth +royals +flemish +failures +mast +charities +##cular +peruvian +capitals +macmillan +ipswich +outward +frigate +postgraduate +folds +employing +##ouse +concurrently +fiery +##tai +contingent +nightmares +monumental +nicaragua +##kowski +lizard +mal +fielding +gig +reject +##pad +harding +##ipe +coastline +##cin +##nos +beethoven +humphrey +innovations +##tam +##nge +norris +doris +solicitor +huang +obey +141 +##lc +niagara +##tton +shelves +aug +bourbon +curry +nightclub +specifications +hilton +##ndo +centennial +dispersed +worm +neglected +briggs +sm +font +kuala +uneasy +plc +##nstein +##bound +##aking +##burgh +awaiting +pronunciation +##bbed +##quest +eh +optimal +zhu +raped +greens +presided +brenda +worries +##life +venetian +marxist +turnout +##lius +refined +braced +sins +grasped +sunderland +nickel +speculated +lowell +cyrillic +communism +fundraising +resembling +colonists +mutant +freddie +usc +##mos +gratitude +##run +mural +##lous +chemist +wi +reminds +28th +steals +tess +pietro +##ingen +promoter +ri +microphone +honoured +rai +sant +##qui +feather +##nson +burlington +kurdish +terrorists +deborah +sickness +##wed +##eet +hazard +irritated +desperation +veil +clarity +##rik +jewels +xv +##gged +##ows +##cup +berkshire +unfair +mysteries +orchid +winced +exhaustion +renovations +stranded +obe +infinity +##nies +adapt +redevelopment +thanked +registry +olga +domingo +noir +tudor +ole +##atus +commenting +behaviors +##ais +crisp +pauline +probable +stirling +wigan +##bian +paralympics +panting +surpassed +##rew +luca +barred +pony +famed +##sters +cassandra +waiter +carolyn +exported +##orted +andres +destructive +deeds +jonah +castles +vacancy +suv +##glass +1788 +orchard +yep +famine +belarusian +sprang +##forth +skinny +##mis +administrators +rotterdam +zambia +zhao +boiler +discoveries +##ride +##physics +lucius +disappointing +outreach +spoon +##frame +qualifications +unanimously +enjoys +regency +##iidae +stade +realism +veterinary +rodgers +dump +alain +chestnut +castile +censorship +rumble +gibbs +##itor +communion +reggae +inactivated +logs +loads +##houses +homosexual +##iano +ale +informs +##cas +phrases +plaster +linebacker +ambrose +kaiser +fascinated +850 +limerick +recruitment +forge +mastered +##nding +leinster +rooted +threaten +##strom +borneo +##hes +suggestions +scholarships +propeller +documentaries +patronage +coats +constructing +invest +neurons +comet +entirety +shouts +identities +annoying +unchanged +wary +##antly +##ogy +neat +oversight +##kos +phillies +replay +constance +##kka +incarnation +humble +skies +minus +##acy +smithsonian +##chel +guerrilla +jar +cadets +##plate +surplus +audit +##aru +cracking +joanna +louisa +pacing +##lights +intentionally +##iri +diner +nwa +imprint +australians +tong +unprecedented +bunker +naive +specialists +ark +nichols +railing +leaked +pedal +##uka +shrub +longing +roofs +v8 +captains +neural +tuned +##ntal +##jet +emission +medina +frantic +codex +definitive +sid +abolition +intensified +stocks +enrique +sustain +genoa +oxide +##written +clues +cha +##gers +tributaries +fragment +venom +##rity +##ente +##sca +muffled +vain +sire +laos +##ingly +##hana +hastily +snapping +surfaced +sentiment +motive +##oft +contests +approximate +mesa +luckily +dinosaur +exchanges +propelled +accord +bourne +relieve +tow +masks +offended +##ues +cynthia +##mmer +rains +bartender +zinc +reviewers +lois +##sai +legged +arrogant +rafe +rosie +comprise +handicap +blockade +inlet +lagoon +copied +drilling +shelley +petals +##inian +mandarin +obsolete +##inated +onward +arguably +productivity +cindy +praising +seldom +busch +discusses +raleigh +shortage +ranged +stanton +encouragement +firstly +conceded +overs +temporal +##uke +cbe +##bos +woo +certainty +pumps +##pton +stalked +##uli +lizzie +periodic +thieves +weaker +##night +gases +shoving +chooses +wc +##chemical +prompting +weights +##kill +robust +flanked +sticky +hu +tuberculosis +##eb +##eal +christchurch +resembled +wallet +reese +inappropriate +pictured +distract +fixing +fiddle +giggled +burger +heirs +hairy +mechanic +torque +apache +obsessed +chiefly +cheng +logging +##tag +extracted +meaningful +numb +##vsky +gloucestershire +reminding +##bay +unite +##lit +breeds +diminished +clown +glove +1860s +##ن +##ug +archibald +focal +freelance +sliced +depiction +##yk +organism +switches +sights +stray +crawling +##ril +lever +leningrad +interpretations +loops +anytime +reel +alicia +delighted +##ech +inhaled +xiv +suitcase +bernie +vega +licenses +northampton +exclusion +induction +monasteries +racecourse +homosexuality +##right +##sfield +##rky +dimitri +michele +alternatives +ions +commentators +genuinely +objected +pork +hospitality +fencing +stephan +warships +peripheral +wit +drunken +wrinkled +quentin +spends +departing +chung +numerical +spokesperson +##zone +johannesburg +caliber +killers +##udge +assumes +neatly +demographic +abigail +bloc +##vel +mounting +##lain +bentley +slightest +xu +recipients +##jk +merlin +##writer +seniors +prisons +blinking +hindwings +flickered +kappa +##hel +80s +strengthening +appealing +brewing +gypsy +mali +lashes +hulk +unpleasant +harassment +bio +treaties +predict +instrumentation +pulp +troupe +boiling +mantle +##ffe +ins +##vn +dividing +handles +verbs +##onal +coconut +senegal +340 +thorough +gum +momentarily +##sto +cocaine +panicked +destined +##turing +teatro +denying +weary +captained +mans +##hawks +##code +wakefield +bollywood +thankfully +##16 +cyril +##wu +amendments +##bahn +consultation +stud +reflections +kindness +1787 +internally +##ovo +tex +mosaic +distribute +paddy +seeming +143 +##hic +piers +##15 +##mura +##verse +popularly +winger +kang +sentinel +mccoy +##anza +covenant +##bag +verge +fireworks +suppress +thrilled +dominate +##jar +swansea +##60 +142 +reconciliation +##ndi +stiffened +cue +dorian +##uf +damascus +amor +ida +foremost +##aga +porsche +unseen +dir +##had +##azi +stony +lexi +melodies +##nko +angular +integer +podcast +ants +inherent +jaws +justify +persona +##olved +josephine +##nr +##ressed +customary +flashes +gala +cyrus +glaring +backyard +ariel +physiology +greenland +html +stir +avon +atletico +finch +methodology +ked +##lent +mas +catholicism +townsend +branding +quincy +fits +containers +1777 +ashore +aragon +##19 +forearm +poisoning +##sd +adopting +conquer +grinding +amnesty +keller +finances +evaluate +forged +lankan +instincts +##uto +guam +bosnian +photographed +workplace +desirable +protector +##dog +allocation +intently +encourages +willy +##sten +bodyguard +electro +brighter +##ν +bihar +##chev +lasts +opener +amphibious +sal +verde +arte +##cope +captivity +vocabulary +yields +##tted +agreeing +desmond +pioneered +##chus +strap +campaigned +railroads +##ович +emblem +##dre +stormed +501 +##ulous +marijuana +northumberland +##gn +##nath +bowen +landmarks +beaumont +##qua +danube +##bler +attorneys +th +ge +flyers +critique +villains +cass +mutation +acc +##0s +colombo +mckay +motif +sampling +concluding +syndicate +##rell +neon +stables +ds +warnings +clint +mourning +wilkinson +##tated +merrill +leopard +evenings +exhaled +emil +sonia +ezra +discrete +stove +farrell +fifteenth +prescribed +superhero +##rier +worms +helm +wren +##duction +##hc +expo +##rator +hq +unfamiliar +antony +prevents +acceleration +fiercely +mari +painfully +calculations +cheaper +ign +clifton +irvine +davenport +mozambique +##np +pierced +##evich +wonders +##wig +##cate +##iling +crusade +ware +##uel +enzymes +reasonably +mls +##coe +mater +ambition +bunny +eliot +kernel +##fin +asphalt +headmaster +torah +aden +lush +pins +waived +##care +##yas +joao +substrate +enforce +##grad +##ules +alvarez +selections +epidemic +tempted +##bit +bremen +translates +ensured +waterfront +29th +forrest +manny +malone +kramer +reigning +cookies +simpler +absorption +205 +engraved +##ffy +evaluated +1778 +haze +146 +comforting +crossover +##abe +thorn +##rift +##imo +##pop +suppression +fatigue +cutter +##tr +201 +wurttemberg +##orf +enforced +hovering +proprietary +gb +samurai +syllable +ascent +lacey +tick +lars +tractor +merchandise +rep +bouncing +defendants +##yre +huntington +##ground +##oko +standardized +##hor +##hima +assassinated +nu +predecessors +rainy +liar +assurance +lyrical +##uga +secondly +flattened +ios +parameter +undercover +##mity +bordeaux +punish +ridges +markers +exodus +inactive +hesitate +debbie +nyc +pledge +savoy +nagar +offset +organist +##tium +hesse +marin +converting +##iver +diagram +propulsion +pu +validity +reverted +supportive +##dc +ministries +clans +responds +proclamation +##inae +##ø +##rea +ein +pleading +patriot +sf +birch +islanders +strauss +hates +##dh +brandenburg +concession +rd +##ob +1900s +killings +textbook +antiquity +cinematography +wharf +embarrassing +setup +creed +farmland +inequality +centred +signatures +fallon +370 +##ingham +##uts +ceylon +gazing +directive +laurie +##tern +globally +##uated +##dent +allah +excavation +threads +##cross +148 +frantically +icc +utilize +determines +respiratory +thoughtful +receptions +##dicate +merging +chandra +seine +147 +builders +builds +diagnostic +dev +visibility +goddamn +analyses +dhaka +cho +proves +chancel +concurrent +curiously +canadians +pumped +restoring +1850s +turtles +jaguar +sinister +spinal +traction +declan +vows +1784 +glowed +capitalism +swirling +install +universidad +##lder +##oat +soloist +##genic +##oor +coincidence +beginnings +nissan +dip +resorts +caucasus +combustion +infectious +##eno +pigeon +serpent +##itating +conclude +masked +salad +jew +##gr +surreal +toni +##wc +harmonica +151 +##gins +##etic +##coat +fishermen +intending +bravery +##wave +klaus +titan +wembley +taiwanese +ransom +40th +incorrect +hussein +eyelids +jp +cooke +dramas +utilities +##etta +##print +eisenhower +principally +granada +lana +##rak +openings +concord +##bl +bethany +connie +morality +sega +##mons +##nard +earnings +##kara +##cine +wii +communes +##rel +coma +composing +softened +severed +grapes +##17 +nguyen +analyzed +warlord +hubbard +heavenly +behave +slovenian +##hit +##ony +hailed +filmmakers +trance +caldwell +skye +unrest +coward +likelihood +##aging +bern +sci +taliban +honolulu +propose +##wang +1700 +browser +imagining +cobra +contributes +dukes +instinctively +conan +violinist +##ores +accessories +gradual +##amp +quotes +sioux +##dating +undertake +intercepted +sparkling +compressed +139 +fungus +tombs +haley +imposing +rests +degradation +lincolnshire +retailers +wetlands +tulsa +distributor +dungeon +nun +greenhouse +convey +atlantis +aft +exits +oman +dresser +lyons +##sti +joking +eddy +judgement +omitted +digits +##cts +##game +juniors +##rae +cents +stricken +une +##ngo +wizards +weir +breton +nan +technician +fibers +liking +royalty +##cca +154 +persia +terribly +magician +##rable +##unt +vance +cafeteria +booker +camille +warmer +##static +consume +cavern +gaps +compass +contemporaries +foyer +soothing +graveyard +maj +plunged +blush +##wear +cascade +demonstrates +ordinance +##nov +boyle +##lana +rockefeller +shaken +banjo +izzy +##ense +breathless +vines +##32 +##eman +alterations +chromosome +dwellings +feudal +mole +153 +catalonia +relics +tenant +mandated +##fm +fridge +hats +honesty +patented +raul +heap +cruisers +accusing +enlightenment +infants +wherein +chatham +contractors +zen +affinity +hc +osborne +piston +156 +traps +maturity +##rana +lagos +##zal +peering +##nay +attendant +dealers +protocols +subset +prospects +biographical +##cre +artery +##zers +insignia +nuns +endured +##eration +recommend +schwartz +serbs +berger +cromwell +crossroads +##ctor +enduring +clasped +grounded +##bine +marseille +twitched +abel +choke +https +catalyst +moldova +italians +##tist +disastrous +wee +##oured +##nti +wwf +nope +##piration +##asa +expresses +thumbs +167 +##nza +coca +1781 +cheating +##ption +skipped +sensory +heidelberg +spies +satan +dangers +semifinal +202 +bohemia +whitish +confusing +shipbuilding +relies +surgeons +landings +ravi +baku +moor +suffix +alejandro +##yana +litre +upheld +##unk +rajasthan +##rek +coaster +insists +posture +scenarios +etienne +favoured +appoint +transgender +elephants +poked +greenwood +defences +fulfilled +militant +somali +1758 +chalk +potent +##ucci +migrants +wink +assistants +nos +restriction +activism +niger +##ario +colon +shaun +##sat +daphne +##erated +swam +congregations +reprise +considerations +magnet +playable +xvi +##р +overthrow +tobias +knob +chavez +coding +##mers +propped +katrina +orient +newcomer +##suke +temperate +##pool +farmhouse +interrogation +##vd +committing +##vert +forthcoming +strawberry +joaquin +macau +ponds +shocking +siberia +##cellular +chant +contributors +##nant +##ologists +sped +absorb +hail +1782 +spared +##hore +barbados +karate +opus +originates +saul +##xie +evergreen +leaped +##rock +correlation +exaggerated +weekday +unification +bump +tracing +brig +afb +pathways +utilizing +##ners +mod +mb +disturbance +kneeling +##stad +##guchi +100th +pune +##thy +decreasing +168 +manipulation +miriam +academia +ecosystem +occupational +rbi +##lem +rift +##14 +rotary +stacked +incorporation +awakening +generators +guerrero +racist +##omy +cyber +derivatives +culminated +allie +annals +panzer +sainte +wikipedia +pops +zu +austro +##vate +algerian +politely +nicholson +mornings +educate +tastes +thrill +dartmouth +##gating +db +##jee +regan +differing +concentrating +choreography +divinity +##media +pledged +alexandre +routing +gregor +madeline +##idal +apocalypse +##hora +gunfire +culminating +elves +fined +liang +lam +programmed +tar +guessing +transparency +gabrielle +##gna +cancellation +flexibility +##lining +accession +shea +stronghold +nets +specializes +##rgan +abused +hasan +sgt +ling +exceeding +##₄ +admiration +supermarket +##ark +photographers +specialised +tilt +resonance +hmm +perfume +380 +sami +threatens +garland +botany +guarding +boiled +greet +puppy +russo +supplier +wilmington +vibrant +vijay +##bius +paralympic +grumbled +paige +faa +licking +margins +hurricanes +##gong +fest +grenade +ripping +##uz +counseling +weigh +##sian +needles +wiltshire +edison +costly +##not +fulton +tramway +redesigned +staffordshire +cache +gasping +watkins +sleepy +candidacy +##group +monkeys +timeline +throbbing +##bid +##sos +berth +uzbekistan +vanderbilt +bothering +overturned +ballots +gem +##iger +sunglasses +subscribers +hooker +compelling +ang +exceptionally +saloon +stab +##rdi +carla +terrifying +rom +##vision +coil +##oids +satisfying +vendors +31st +mackay +deities +overlooked +ambient +bahamas +felipe +olympia +whirled +botanist +advertised +tugging +##dden +disciples +morales +unionist +rites +foley +morse +motives +creepy +##₀ +soo +##sz +bargain +highness +frightening +turnpike +tory +reorganization +##cer +depict +biographer +##walk +unopposed +manifesto +##gles +institut +emile +accidental +kapoor +##dam +kilkenny +cortex +lively +##13 +romanesque +jain +shan +cannons +##ood +##ske +petrol +echoing +amalgamated +disappears +cautious +proposes +sanctions +trenton +##ر +flotilla +aus +contempt +tor +canary +cote +theirs +##hun +conceptual +deleted +fascinating +paso +blazing +elf +honourable +hutchinson +##eiro +##outh +##zin +surveyor +tee +amidst +wooded +reissue +intro +##ono +cobb +shelters +newsletter +hanson +brace +encoding +confiscated +dem +caravan +marino +scroll +melodic +cows +imam +##adi +##aneous +northward +searches +biodiversity +cora +310 +roaring +##bers +connell +theologian +halo +compose +pathetic +unmarried +dynamo +##oot +az +calculation +toulouse +deserves +humour +nr +forgiveness +tam +undergone +martyr +pamela +myths +whore +counselor +hicks +290 +heavens +battleship +electromagnetic +##bbs +stellar +establishments +presley +hopped +##chin +temptation +90s +wills +nas +##yuan +nhs +##nya +seminars +##yev +adaptations +gong +asher +lex +indicator +sikh +tobago +cites +goin +##yte +satirical +##gies +characterised +correspond +bubbles +lure +participates +##vid +eruption +skate +therapeutic +1785 +canals +wholesale +defaulted +sac +460 +petit +##zzled +virgil +leak +ravens +256 +portraying +##yx +ghetto +creators +dams +portray +vicente +##rington +fae +namesake +bounty +##arium +joachim +##ota +##iser +aforementioned +axle +snout +depended +dismantled +reuben +480 +##ibly +gallagher +##lau +##pd +earnest +##ieu +##iary +inflicted +objections +##llar +asa +gritted +##athy +jericho +##sea +##was +flick +underside +ceramics +undead +substituted +195 +eastward +undoubtedly +wheeled +chimney +##iche +guinness +cb +##ager +siding +##bell +traitor +baptiste +disguised +inauguration +149 +tipperary +choreographer +perched +warmed +stationary +eco +##ike +##ntes +bacterial +##aurus +flores +phosphate +##core +attacker +invaders +alvin +intersects +a1 +indirectly +immigrated +businessmen +cornelius +valves +narrated +pill +sober +ul +nationale +monastic +applicants +scenery +##jack +161 +motifs +constitutes +cpu +##osh +jurisdictions +sd +tuning +irritation +woven +##uddin +fertility +gao +##erie +antagonist +impatient +glacial +hides +boarded +denominations +interception +##jas +cookie +nicola +##tee +algebraic +marquess +bahn +parole +buyers +bait +turbines +paperwork +bestowed +natasha +renee +oceans +purchases +157 +vaccine +215 +##tock +fixtures +playhouse +integrate +jai +oswald +intellectuals +##cky +booked +nests +mortimer +##isi +obsession +sept +##gler +##sum +440 +scrutiny +simultaneous +squinted +##shin +collects +oven +shankar +penned +remarkably +##я +slips +luggage +spectral +1786 +collaborations +louie +consolidation +##ailed +##ivating +420 +hoover +blackpool +harness +ignition +vest +tails +belmont +mongol +skinner +##nae +visually +mage +derry +##tism +##unce +stevie +transitional +##rdy +redskins +drying +prep +prospective +##21 +annoyance +oversee +##loaded +fills +##books +##iki +announces +fda +scowled +respects +prasad +mystic +tucson +##vale +revue +springer +bankrupt +1772 +aristotle +salvatore +habsburg +##geny +dal +natal +nut +pod +chewing +darts +moroccan +walkover +rosario +lenin +punjabi +##ße +grossed +scattering +wired +invasive +hui +polynomial +corridors +wakes +gina +portrays +##cratic +arid +retreating +erich +irwin +sniper +##dha +linen +lindsey +maneuver +butch +shutting +socio +bounce +commemorative +postseason +jeremiah +pines +275 +mystical +beads +bp +abbas +furnace +bidding +consulted +assaulted +empirical +rubble +enclosure +sob +weakly +cancel +polly +yielded +##emann +curly +prediction +battered +70s +vhs +jacqueline +render +sails +barked +detailing +grayson +riga +sloane +raging +##yah +herbs +bravo +##athlon +alloy +giggle +imminent +suffers +assumptions +waltz +##itate +accomplishments +##ited +bathing +remixed +deception +prefix +##emia +deepest +##tier +##eis +balkan +frogs +##rong +slab +##pate +philosophers +peterborough +grains +imports +dickinson +rwanda +##atics +1774 +dirk +lan +tablets +##rove +clone +##rice +caretaker +hostilities +mclean +##gre +regimental +treasures +norms +impose +tsar +tango +diplomacy +variously +complain +192 +recognise +arrests +1779 +celestial +pulitzer +##dus +bing +libretto +##moor +adele +splash +##rite +expectation +lds +confronts +##izer +spontaneous +harmful +wedge +entrepreneurs +buyer +##ope +bilingual +translate +rugged +conner +circulated +uae +eaton +##gra +##zzle +lingered +lockheed +vishnu +reelection +alonso +##oom +joints +yankee +headline +cooperate +heinz +laureate +invading +##sford +echoes +scandinavian +##dham +hugging +vitamin +salute +micah +hind +trader +##sper +radioactive +##ndra +militants +poisoned +ratified +remark +campeonato +deprived +wander +prop +##dong +outlook +##tani +##rix +##eye +chiang +darcy +##oping +mandolin +spice +statesman +babylon +182 +walled +forgetting +afro +##cap +158 +giorgio +buffer +##polis +planetary +##gis +overlap +terminals +kinda +centenary +##bir +arising +manipulate +elm +ke +1770 +ak +##tad +chrysler +mapped +moose +pomeranian +quad +macarthur +assemblies +shoreline +recalls +stratford +##rted +noticeable +##evic +imp +##rita +##sque +accustomed +supplying +tents +disgusted +vogue +sipped +filters +khz +reno +selecting +luftwaffe +mcmahon +tyne +masterpiece +carriages +collided +dunes +exercised +flare +remembers +muzzle +##mobile +heck +##rson +burgess +lunged +middleton +boycott +bilateral +##sity +hazardous +lumpur +multiplayer +spotlight +jackets +goldman +liege +porcelain +rag +waterford +benz +attracts +hopeful +battling +ottomans +kensington +baked +hymns +cheyenne +lattice +levine +borrow +polymer +clashes +michaels +monitored +commitments +denounced +##25 +##von +cavity +##oney +hobby +akin +##holders +futures +intricate +cornish +patty +##oned +illegally +dolphin +##lag +barlow +yellowish +maddie +apologized +luton +plagued +##puram +nana +##rds +sway +fanny +łodz +##rino +psi +suspicions +hanged +##eding +initiate +charlton +##por +nak +competent +235 +analytical +annex +wardrobe +reservations +##rma +sect +162 +fairfax +hedge +piled +buckingham +uneven +bauer +simplicity +snyder +interpret +accountability +donors +moderately +byrd +continents +##cite +##max +disciple +hr +jamaican +ping +nominees +##uss +mongolian +diver +attackers +eagerly +ideological +pillows +miracles +apartheid +revolver +sulfur +clinics +moran +163 +##enko +ile +katy +rhetoric +##icated +chronology +recycling +##hrer +elongated +mughal +pascal +profiles +vibration +databases +domination +##fare +##rant +matthias +digest +rehearsal +polling +weiss +initiation +reeves +clinging +flourished +impress +ngo +##hoff +##ume +buckley +symposium +rhythms +weed +emphasize +transforming +##taking +##gence +##yman +accountant +analyze +flicker +foil +priesthood +voluntarily +decreases +##80 +##hya +slater +sv +charting +mcgill +##lde +moreno +##iu +besieged +zur +robes +##phic +admitting +api +deported +turmoil +peyton +earthquakes +##ares +nationalists +beau +clair +brethren +interrupt +welch +curated +galerie +requesting +164 +##ested +impending +steward +viper +##vina +complaining +beautifully +brandy +foam +nl +1660 +##cake +alessandro +punches +laced +explanations +##lim +attribute +clit +reggie +discomfort +##cards +smoothed +whales +##cene +adler +countered +duffy +disciplinary +widening +recipe +reliance +conducts +goats +gradient +preaching +##shaw +matilda +quasi +striped +meridian +cannabis +cordoba +certificates +##agh +##tering +graffiti +hangs +pilgrims +repeats +##ych +revive +urine +etat +##hawk +fueled +belts +fuzzy +susceptible +##hang +mauritius +salle +sincere +beers +hooks +##cki +arbitration +entrusted +advise +sniffed +seminar +junk +donnell +processors +principality +strapped +celia +mendoza +everton +fortunes +prejudice +starving +reassigned +steamer +##lund +tuck +evenly +foreman +##ffen +dans +375 +envisioned +slit +##xy +baseman +liberia +rosemary +##weed +electrified +periodically +potassium +stride +contexts +sperm +slade +mariners +influx +bianca +subcommittee +##rane +spilling +icao +estuary +##nock +delivers +iphone +##ulata +isa +mira +bohemian +dessert +##sbury +welcoming +proudly +slowing +##chs +musee +ascension +russ +##vian +waits +##psy +africans +exploit +##morphic +gov +eccentric +crab +peck +##ull +entrances +formidable +marketplace +groom +bolted +metabolism +patton +robbins +courier +payload +endure +##ifier +andes +refrigerator +##pr +ornate +##uca +ruthless +illegitimate +masonry +strasbourg +bikes +adobe +##³ +apples +quintet +willingly +niche +bakery +corpses +energetic +##cliffe +##sser +##ards +177 +centimeters +centro +fuscous +cretaceous +rancho +##yde +andrei +telecom +tottenham +oasis +ordination +vulnerability +presiding +corey +cp +penguins +sims +##pis +malawi +piss +##48 +correction +##cked +##ffle +##ryn +countdown +detectives +psychiatrist +psychedelic +dinosaurs +blouse +##get +choi +vowed +##oz +randomly +##pol +49ers +scrub +blanche +bruins +dusseldorf +##using +unwanted +##ums +212 +dominique +elevations +headlights +om +laguna +##oga +1750 +famously +ignorance +shrewsbury +##aine +ajax +breuning +che +confederacy +greco +overhaul +##screen +paz +skirts +disagreement +cruelty +jagged +phoebe +shifter +hovered +viruses +##wes +mandy +##lined +##gc +landlord +squirrel +dashed +##ι +ornamental +gag +wally +grange +literal +spurs +undisclosed +proceeding +yin +##text +billie +orphan +spanned +humidity +indy +weighted +presentations +explosions +lucian +##tary +vaughn +hindus +##anga +##hell +psycho +171 +daytona +protects +efficiently +rematch +sly +tandem +##oya +rebranded +impaired +hee +metropolis +peach +godfrey +diaspora +ethnicity +prosperous +gleaming +dar +grossing +playback +##rden +stripe +pistols +##tain +births +labelled +##cating +172 +rudy +alba +##onne +aquarium +hostility +##gb +##tase +shudder +sumatra +hardest +lakers +consonant +creeping +demos +homicide +capsule +zeke +liberties +expulsion +pueblo +##comb +trait +transporting +##ddin +##neck +##yna +depart +gregg +mold +ledge +hangar +oldham +playboy +termination +analysts +gmbh +romero +##itic +insist +cradle +filthy +brightness +slash +shootout +deposed +bordering +##truct +isis +microwave +tumbled +sheltered +cathy +werewolves +messy +andersen +convex +clapped +clinched +satire +wasting +edo +vc +rufus +##jak +mont +##etti +poznan +##keeping +restructuring +transverse +##rland +azerbaijani +slovene +gestures +roommate +choking +shear +##quist +vanguard +oblivious +##hiro +disagreed +baptism +##lich +coliseum +##aceae +salvage +societe +cory +locke +relocation +relying +versailles +ahl +swelling +##elo +cheerful +##word +##edes +gin +sarajevo +obstacle +diverted +##nac +messed +thoroughbred +fluttered +utrecht +chewed +acquaintance +assassins +dispatch +mirza +##wart +nike +salzburg +swell +yen +##gee +idle +ligue +samson +##nds +##igh +playful +spawned +##cise +tease +##case +burgundy +##bot +stirring +skeptical +interceptions +marathi +##dies +bedrooms +aroused +pinch +##lik +preferences +tattoos +buster +digitally +projecting +rust +##ital +kitten +priorities +addison +pseudo +##guard +dusk +icons +sermon +##psis +##iba +bt +##lift +##xt +ju +truce +rink +##dah +##wy +defects +psychiatry +offences +calculate +glucose +##iful +##rized +##unda +francaise +##hari +richest +warwickshire +carly +1763 +purity +redemption +lending +##cious +muse +bruises +cerebral +aero +carving +##name +preface +terminology +invade +monty +##int +anarchist +blurred +##iled +rossi +treats +guts +shu +foothills +ballads +undertaking +premise +cecilia +affiliates +blasted +conditional +wilder +minors +drone +rudolph +buffy +swallowing +horton +attested +##hop +rutherford +howell +primetime +livery +penal +##bis +minimize +hydro +wrecked +wrought +palazzo +##gling +cans +vernacular +friedman +nobleman +shale +walnut +danielle +##ection +##tley +sears +##kumar +chords +lend +flipping +streamed +por +dracula +gallons +sacrifices +gamble +orphanage +##iman +mckenzie +##gible +boxers +daly +##balls +##ان +208 +##ific +##rative +##iq +exploited +slated +##uity +circling +hillary +pinched +goldberg +provost +campaigning +lim +piles +ironically +jong +mohan +successors +usaf +##tem +##ught +autobiographical +haute +preserves +##ending +acquitted +comparisons +203 +hydroelectric +gangs +cypriot +torpedoes +rushes +chrome +derive +bumps +instability +fiat +pets +##mbe +silas +dye +reckless +settler +##itation +info +heats +##writing +176 +canonical +maltese +fins +mushroom +stacy +aspen +avid +##kur +##loading +vickers +gaston +hillside +statutes +wilde +gail +kung +sabine +comfortably +motorcycles +##rgo +169 +pneumonia +fetch +##sonic +axel +faintly +parallels +##oop +mclaren +spouse +compton +interdisciplinary +miner +##eni +181 +clamped +##chal +##llah +separates +versa +##mler +scarborough +labrador +##lity +##osing +rutgers +hurdles +como +166 +burt +divers +##100 +wichita +cade +coincided +##erson +bruised +mla +##pper +vineyard +##ili +##brush +notch +mentioning +jase +hearted +kits +doe +##acle +pomerania +##ady +ronan +seizure +pavel +problematic +##zaki +domenico +##ulin +catering +penelope +dependence +parental +emilio +ministerial +atkinson +##bolic +clarkson +chargers +colby +grill +peeked +arises +summon +##aged +fools +##grapher +faculties +qaeda +##vial +garner +refurbished +##hwa +geelong +disasters +nudged +bs +shareholder +lori +algae +reinstated +rot +##ades +##nous +invites +stainless +183 +inclusive +##itude +diocesan +til +##icz +denomination +##xa +benton +floral +registers +##ider +##erman +##kell +absurd +brunei +guangzhou +hitter +retaliation +##uled +##eve +blanc +nh +consistency +contamination +##eres +##rner +dire +palermo +broadcasters +diaries +inspire +vols +brewer +tightening +ky +mixtape +hormone +##tok +stokes +##color +##dly +##ssi +pg +##ometer +##lington +sanitation +##tility +intercontinental +apps +##adt +¹⁄₂ +cylinders +economies +favourable +unison +croix +gertrude +odyssey +vanity +dangling +##logists +upgrades +dice +middleweight +practitioner +##ight +206 +henrik +parlor +orion +angered +lac +python +blurted +##rri +sensual +intends +swings +angled +##phs +husky +attain +peerage +precinct +textiles +cheltenham +shuffled +dai +confess +tasting +bhutan +##riation +tyrone +segregation +abrupt +ruiz +##rish +smirked +blackwell +confidential +browning +amounted +##put +vase +scarce +fabulous +raided +staple +guyana +unemployed +glider +shay +##tow +carmine +troll +intervene +squash +superstar +##uce +cylindrical +len +roadway +researched +handy +##rium +##jana +meta +lao +declares +##rring +##tadt +##elin +##kova +willem +shrubs +napoleonic +realms +skater +qi +volkswagen +##ł +tad +hara +archaeologist +awkwardly +eerie +##kind +wiley +##heimer +##24 +titus +organizers +cfl +crusaders +lama +usb +vent +enraged +thankful +occupants +maximilian +##gaard +possessing +textbooks +##oran +collaborator +quaker +##ulo +avalanche +mono +silky +straits +isaiah +mustang +surged +resolutions +potomac +descend +cl +kilograms +plato +strains +saturdays +##olin +bernstein +##ype +holstein +ponytail +##watch +belize +conversely +heroine +perpetual +##ylus +charcoal +piedmont +glee +negotiating +backdrop +prologue +##jah +##mmy +pasadena +climbs +ramos +sunni +##holm +##tner +##tri +anand +deficiency +hertfordshire +stout +##avi +aperture +orioles +##irs +doncaster +intrigued +bombed +coating +otis +##mat +cocktail +##jit +##eto +amir +arousal +sar +##proof +##act +##ories +dixie +pots +##bow +whereabouts +159 +##fted +drains +bullying +cottages +scripture +coherent +fore +poe +appetite +##uration +sampled +##ators +##dp +derrick +rotor +jays +peacock +installment +##rro +advisors +##coming +rodeo +scotch +##mot +##db +##fen +##vant +ensued +rodrigo +dictatorship +martyrs +twenties +##н +towed +incidence +marta +rainforest +sai +scaled +##cles +oceanic +qualifiers +symphonic +mcbride +dislike +generalized +aubrey +colonization +##iation +##lion +##ssing +disliked +lublin +salesman +##ulates +spherical +whatsoever +sweating +avalon +contention +punt +severity +alderman +atari +##dina +##grant +##rop +scarf +seville +vertices +annexation +fairfield +fascination +inspiring +launches +palatinate +regretted +##rca +feral +##iom +elk +nap +olsen +reddy +yong +##leader +##iae +garment +transports +feng +gracie +outrage +viceroy +insides +##esis +breakup +grady +organizer +softer +grimaced +222 +murals +galicia +arranging +vectors +##rsten +bas +##sb +##cens +sloan +##eka +bitten +ara +fender +nausea +bumped +kris +banquet +comrades +detector +persisted +##llan +adjustment +endowed +cinemas +##shot +sellers +##uman +peek +epa +kindly +neglect +simpsons +talon +mausoleum +runaway +hangul +lookout +##cic +rewards +coughed +acquainted +chloride +##ald +quicker +accordion +neolithic +##qa +artemis +coefficient +lenny +pandora +tx +##xed +ecstasy +litter +segunda +chairperson +gemma +hiss +rumor +vow +nasal +antioch +compensate +patiently +transformers +##eded +judo +morrow +penis +posthumous +philips +bandits +husbands +denote +flaming +##any +##phones +langley +yorker +1760 +walters +##uo +##kle +gubernatorial +fatty +samsung +leroy +outlaw +##nine +unpublished +poole +jakob +##ᵢ +##ₙ +crete +distorted +superiority +##dhi +intercept +crust +mig +claus +crashes +positioning +188 +stallion +301 +frontal +armistice +##estinal +elton +aj +encompassing +camel +commemorated +malaria +woodward +calf +cigar +penetrate +##oso +willard +##rno +##uche +illustrate +amusing +convergence +noteworthy +##lma +##rva +journeys +realise +manfred +##sable +410 +##vocation +hearings +fiance +##posed +educators +provoked +adjusting +##cturing +modular +stockton +paterson +vlad +rejects +electors +selena +maureen +##tres +uber +##rce +swirled +##num +proportions +nanny +pawn +naturalist +parma +apostles +awoke +ethel +wen +##bey +monsoon +overview +##inating +mccain +rendition +risky +adorned +##ih +equestrian +germain +nj +conspicuous +confirming +##yoshi +shivering +##imeter +milestone +rumours +flinched +bounds +smacked +token +##bei +lectured +automobiles +##shore +impacted +##iable +nouns +nero +##leaf +ismail +prostitute +trams +##lace +bridget +sud +stimulus +impressions +reins +revolves +##oud +##gned +giro +honeymoon +##swell +criterion +##sms +##uil +libyan +prefers +##osition +211 +preview +sucks +accusation +bursts +metaphor +diffusion +tolerate +faye +betting +cinematographer +liturgical +specials +bitterly +humboldt +##ckle +flux +rattled +##itzer +archaeologists +odor +authorised +marshes +discretion +##ов +alarmed +archaic +inverse +##leton +explorers +##pine +drummond +tsunami +woodlands +##minate +##tland +booklet +insanity +owning +insert +crafted +calculus +##tore +receivers +##bt +stung +##eca +##nched +prevailing +travellers +eyeing +lila +graphs +##borne +178 +julien +##won +morale +adaptive +therapist +erica +cw +libertarian +bowman +pitches +vita +##ional +crook +##ads +##entation +caledonia +mutiny +##sible +1840s +automation +##ß +flock +##pia +ironic +pathology +##imus +remarried +##22 +joker +withstand +energies +##att +shropshire +hostages +madeleine +tentatively +conflicting +mateo +recipes +euros +ol +mercenaries +nico +##ndon +albuquerque +augmented +mythical +bel +freud +##child +cough +##lica +365 +freddy +lillian +genetically +nuremberg +calder +209 +bonn +outdoors +paste +suns +urgency +vin +restraint +tyson +##cera +##selle +barrage +bethlehem +kahn +##par +mounts +nippon +barony +happier +ryu +makeshift +sheldon +blushed +castillo +barking +listener +taped +bethel +fluent +headlines +pornography +rum +disclosure +sighing +mace +doubling +gunther +manly +##plex +rt +interventions +physiological +forwards +emerges +##tooth +##gny +compliment +rib +recession +visibly +barge +faults +connector +exquisite +prefect +##rlin +patio +##cured +elevators +brandt +italics +pena +173 +wasp +satin +ea +botswana +graceful +respectable +##jima +##rter +##oic +franciscan +generates +##dl +alfredo +disgusting +##olate +##iously +sherwood +warns +cod +promo +cheryl +sino +##ة +##escu +twitch +##zhi +brownish +thom +ortiz +##dron +densely +##beat +carmel +reinforce +##bana +187 +anastasia +downhill +vertex +contaminated +remembrance +harmonic +homework +##sol +fiancee +gears +olds +angelica +loft +ramsay +quiz +colliery +sevens +##cape +autism +##hil +walkway +##boats +ruben +abnormal +ounce +khmer +##bbe +zachary +bedside +morphology +punching +##olar +sparrow +convinces +##35 +hewitt +queer +remastered +rods +mabel +solemn +notified +lyricist +symmetric +##xide +174 +encore +passports +wildcats +##uni +baja +##pac +mildly +##ease +bleed +commodity +mounds +glossy +orchestras +##omo +damian +prelude +ambitions +##vet +awhile +remotely +##aud +asserts +imply +##iques +distinctly +modelling +remedy +##dded +windshield +dani +xiao +##endra +audible +powerplant +1300 +invalid +elemental +acquisitions +##hala +immaculate +libby +plata +smuggling +ventilation +denoted +minh +##morphism +430 +differed +dion +kelley +lore +mocking +sabbath +spikes +hygiene +drown +runoff +stylized +tally +liberated +aux +interpreter +righteous +aba +siren +reaper +pearce +millie +##cier +##yra +gaius +##iso +captures +##ttering +dorm +claudio +##sic +benches +knighted +blackness +##ored +discount +fumble +oxidation +routed +##ς +novak +perpendicular +spoiled +fracture +splits +##urt +pads +topology +##cats +axes +fortunate +offenders +protestants +esteem +221 +broadband +convened +frankly +hound +prototypes +isil +facilitated +keel +##sher +sahara +awaited +bubba +orb +prosecutors +186 +hem +520 +##xing +relaxing +remnant +romney +sorted +slalom +stefano +ulrich +##active +exemption +folder +pauses +foliage +hitchcock +epithet +204 +criticisms +##aca +ballistic +brody +hinduism +chaotic +youths +equals +##pala +pts +thicker +analogous +capitalist +improvised +overseeing +sinatra +ascended +beverage +##tl +straightforward +##kon +curran +##west +bois +325 +induce +surveying +emperors +sax +unpopular +##kk +cartoonist +fused +##mble +unto +##yuki +localities +##cko +##ln +darlington +slain +academie +lobbying +sediment +puzzles +##grass +defiance +dickens +manifest +tongues +alumnus +arbor +coincide +184 +appalachian +mustafa +examiner +cabaret +traumatic +yves +bracelet +draining +heroin +magnum +baths +odessa +consonants +mitsubishi +##gua +kellan +vaudeville +##fr +joked +null +straps +probation +##ław +ceded +interfaces +##pas +##zawa +blinding +viet +224 +rothschild +museo +640 +huddersfield +##vr +tactic +##storm +brackets +dazed +incorrectly +##vu +reg +glazed +fearful +manifold +benefited +irony +##sun +stumbling +##rte +willingness +balkans +mei +wraps +##aba +injected +##lea +gu +syed +harmless +##hammer +bray +takeoff +poppy +timor +cardboard +astronaut +purdue +weeping +southbound +cursing +stalls +diagonal +##neer +lamar +bryce +comte +weekdays +harrington +##uba +negatively +##see +lays +grouping +##cken +##henko +affirmed +halle +modernist +##lai +hodges +smelling +aristocratic +baptized +dismiss +justification +oilers +##now +coupling +qin +snack +healer +##qing +gardener +layla +battled +formulated +stephenson +gravitational +##gill +##jun +1768 +granny +coordinating +suites +##cd +##ioned +monarchs +##cote +##hips +sep +blended +apr +barrister +deposition +fia +mina +policemen +paranoid +##pressed +churchyard +covert +crumpled +creep +abandoning +tr +transmit +conceal +barr +understands +readiness +spire +##cology +##enia +##erry +610 +startling +unlock +vida +bowled +slots +##nat +##islav +spaced +trusting +admire +rig +##ink +slack +##70 +mv +207 +casualty +##wei +classmates +##odes +##rar +##rked +amherst +furnished +evolve +foundry +menace +mead +##lein +flu +wesleyan +##kled +monterey +webber +##vos +wil +##mith +##на +bartholomew +justices +restrained +##cke +amenities +191 +mediated +sewage +trenches +ml +mainz +##thus +1800s +##cula +##inski +caine +bonding +213 +converts +spheres +superseded +marianne +crypt +sweaty +ensign +historia +##br +spruce +##post +##ask +forks +thoughtfully +yukon +pamphlet +ames +##uter +karma +##yya +bryn +negotiation +sighs +incapable +##mbre +##ntial +actresses +taft +##mill +luce +prevailed +##amine +1773 +motionless +envoy +testify +investing +sculpted +instructors +provence +kali +cullen +horseback +##while +goodwin +##jos +gaa +norte +##ldon +modify +wavelength +abd +214 +skinned +sprinter +forecast +scheduling +marries +squared +tentative +##chman +boer +##isch +bolts +swap +fisherman +assyrian +impatiently +guthrie +martins +murdoch +194 +tanya +nicely +dolly +lacy +med +##45 +syn +decks +fashionable +millionaire +##ust +surfing +##ml +##ision +heaved +tammy +consulate +attendees +routinely +197 +fuse +saxophonist +backseat +malaya +##lord +scowl +tau +##ishly +193 +sighted +steaming +##rks +303 +911 +##holes +##hong +ching +##wife +bless +conserved +jurassic +stacey +unix +zion +chunk +rigorous +blaine +198 +peabody +slayer +dismay +brewers +nz +##jer +det +##glia +glover +postwar +int +penetration +sylvester +imitation +vertically +airlift +heiress +knoxville +viva +##uin +390 +macon +##rim +##fighter +##gonal +janice +##orescence +##wari +marius +belongings +leicestershire +196 +blanco +inverted +preseason +sanity +sobbing +##due +##elt +##dled +collingwood +regeneration +flickering +shortest +##mount +##osi +feminism +##lat +sherlock +cabinets +fumbled +northbound +precedent +snaps +##mme +researching +##akes +guillaume +insights +manipulated +vapor +neighbour +sap +gangster +frey +f1 +stalking +scarcely +callie +barnett +tendencies +audi +doomed +assessing +slung +panchayat +ambiguous +bartlett +##etto +distributing +violating +wolverhampton +##hetic +swami +histoire +##urus +liable +pounder +groin +hussain +larsen +popping +surprises +##atter +vie +curt +##station +mute +relocate +musicals +authorization +richter +##sef +immortality +tna +bombings +##press +deteriorated +yiddish +##acious +robbed +colchester +cs +pmid +ao +verified +balancing +apostle +swayed +recognizable +oxfordshire +retention +nottinghamshire +contender +judd +invitational +shrimp +uhf +##icient +cleaner +longitudinal +tanker +##mur +acronym +broker +koppen +sundance +suppliers +##gil +4000 +clipped +fuels +petite +##anne +landslide +helene +diversion +populous +landowners +auspices +melville +quantitative +##xes +ferries +nicky +##llus +doo +haunting +roche +carver +downed +unavailable +##pathy +approximation +hiroshima +##hue +garfield +valle +comparatively +keyboardist +traveler +##eit +congestion +calculating +subsidiaries +##bate +serb +modernization +fairies +deepened +ville +averages +##lore +inflammatory +tonga +##itch +co₂ +squads +##hea +gigantic +serum +enjoyment +retailer +verona +35th +cis +##phobic +magna +technicians +##vati +arithmetic +##sport +levin +##dation +amtrak +chow +sienna +##eyer +backstage +entrepreneurship +##otic +learnt +tao +##udy +worcestershire +formulation +baggage +hesitant +bali +sabotage +##kari +barren +enhancing +murmur +pl +freshly +putnam +syntax +aces +medicines +resentment +bandwidth +##sier +grins +chili +guido +##sei +framing +implying +gareth +lissa +genevieve +pertaining +admissions +geo +thorpe +proliferation +sato +bela +analyzing +parting +##gor +awakened +##isman +huddled +secrecy +##kling +hush +gentry +540 +dungeons +##ego +coasts +##utz +sacrificed +##chule +landowner +mutually +prevalence +programmer +adolescent +disrupted +seaside +gee +trusts +vamp +georgie +##nesian +##iol +schedules +sindh +##market +etched +hm +sparse +bey +beaux +scratching +gliding +unidentified +216 +collaborating +gems +jesuits +oro +accumulation +shaping +mbe +anal +##xin +231 +enthusiasts +newscast +##egan +janata +dewey +parkinson +179 +ankara +biennial +towering +dd +inconsistent +950 +##chet +thriving +terminate +cabins +furiously +eats +advocating +donkey +marley +muster +phyllis +leiden +##user +grassland +glittering +iucn +loneliness +217 +memorandum +armenians +##ddle +popularized +rhodesia +60s +lame +##illon +sans +bikini +header +orbits +##xx +##finger +##ulator +sharif +spines +biotechnology +strolled +naughty +yates +##wire +fremantle +milo +##mour +abducted +removes +##atin +humming +wonderland +##chrome +##ester +hume +pivotal +##rates +armand +grams +believers +elector +rte +apron +bis +scraped +##yria +endorsement +initials +##llation +eps +dotted +hints +buzzing +emigration +nearer +##tom +indicators +##ulu +coarse +neutron +protectorate +##uze +directional +exploits +pains +loire +1830s +proponents +guggenheim +rabbits +ritchie +305 +hectare +inputs +hutton +##raz +verify +##ako +boilers +longitude +##lev +skeletal +yer +emilia +citrus +compromised +##gau +pokemon +prescription +paragraph +eduard +cadillac +attire +categorized +kenyan +weddings +charley +##bourg +entertain +monmouth +##lles +nutrients +davey +mesh +incentive +practised +ecosystems +kemp +subdued +overheard +##rya +bodily +maxim +##nius +apprenticeship +ursula +##fight +lodged +rug +silesian +unconstitutional +patel +inspected +coyote +unbeaten +##hak +34th +disruption +convict +parcel +##cl +##nham +collier +implicated +mallory +##iac +##lab +susannah +winkler +##rber +shia +phelps +sediments +graphical +robotic +##sner +adulthood +mart +smoked +##isto +kathryn +clarified +##aran +divides +convictions +oppression +pausing +burying +##mt +federico +mathias +eileen +##tana +kite +hunched +##acies +189 +##atz +disadvantage +liza +kinetic +greedy +paradox +yokohama +dowager +trunks +ventured +##gement +gupta +vilnius +olaf +##thest +crimean +hopper +##ej +progressively +arturo +mouthed +arrondissement +##fusion +rubin +simulcast +oceania +##orum +##stra +##rred +busiest +intensely +navigator +cary +##vine +##hini +##bies +fife +rowe +rowland +posing +insurgents +shafts +lawsuits +activate +conor +inward +culturally +garlic +265 +##eering +eclectic +##hui +##kee +##nl +furrowed +vargas +meteorological +rendezvous +##aus +culinary +commencement +##dition +quota +##notes +mommy +salaries +overlapping +mule +##iology +##mology +sums +wentworth +##isk +##zione +mainline +subgroup +##illy +hack +plaintiff +verdi +bulb +differentiation +engagements +multinational +supplemented +bertrand +caller +regis +##naire +##sler +##arts +##imated +blossom +propagation +kilometer +viaduct +vineyards +##uate +beckett +optimization +golfer +songwriters +seminal +semitic +thud +volatile +evolving +ridley +##wley +trivial +distributions +scandinavia +jiang +##ject +wrestled +insistence +##dio +emphasizes +napkin +##ods +adjunct +rhyme +##ricted +##eti +hopeless +surrounds +tremble +32nd +smoky +##ntly +oils +medicinal +padded +steer +wilkes +219 +255 +concessions +hue +uniquely +blinded +landon +yahoo +##lane +hendrix +commemorating +dex +specify +chicks +##ggio +intercity +1400 +morley +##torm +highlighting +##oting +pang +oblique +stalled +##liner +flirting +newborn +1769 +bishopric +shaved +232 +currie +##ush +dharma +spartan +##ooped +favorites +smug +novella +sirens +abusive +creations +espana +##lage +paradigm +semiconductor +sheen +##rdo +##yen +##zak +nrl +renew +##pose +##tur +adjutant +marches +norma +##enity +ineffective +weimar +grunt +##gat +lordship +plotting +expenditure +infringement +lbs +refrain +av +mimi +mistakenly +postmaster +1771 +##bara +ras +motorsports +tito +199 +subjective +##zza +bully +stew +##kaya +prescott +1a +##raphic +##zam +bids +styling +paranormal +reeve +sneaking +exploding +katz +akbar +migrant +syllables +indefinitely +##ogical +destroys +replaces +applause +##phine +pest +##fide +218 +articulated +bertie +##thing +##cars +##ptic +courtroom +crowley +aesthetics +cummings +tehsil +hormones +titanic +dangerously +##ibe +stadion +jaenelle +auguste +ciudad +##chu +mysore +partisans +##sio +lucan +philipp +##aly +debating +henley +interiors +##rano +##tious +homecoming +beyonce +usher +henrietta +prepares +weeds +##oman +ely +plucked +##pire +##dable +luxurious +##aq +artifact +password +pasture +juno +maddy +minsk +##dder +##ologies +##rone +assessments +martian +royalist +1765 +examines +##mani +##rge +nino +223 +parry +scooped +relativity +##eli +##uting +##cao +congregational +noisy +traverse +##agawa +strikeouts +nickelodeon +obituary +transylvania +binds +depictions +polk +trolley +##yed +##lard +breeders +##under +dryly +hokkaido +1762 +strengths +stacks +bonaparte +connectivity +neared +prostitutes +stamped +anaheim +gutierrez +sinai +##zzling +bram +fresno +madhya +##86 +proton +##lena +##llum +##phon +reelected +wanda +##anus +##lb +ample +distinguishing +##yler +grasping +sermons +tomato +bland +stimulation +avenues +##eux +spreads +scarlett +fern +pentagon +assert +baird +chesapeake +ir +calmed +distortion +fatalities +##olis +correctional +pricing +##astic +##gina +prom +dammit +ying +collaborate +##chia +welterweight +33rd +pointer +substitution +bonded +umpire +communicating +multitude +paddle +##obe +federally +intimacy +##insky +betray +ssr +##lett +##lean +##lves +##therapy +airbus +##tery +functioned +ud +bearer +biomedical +netflix +##hire +##nca +condom +brink +ik +##nical +macy +##bet +flap +gma +experimented +jelly +lavender +##icles +##ulia +munro +##mian +##tial +rye +##rle +60th +gigs +hottest +rotated +predictions +fuji +bu +##erence +##omi +barangay +##fulness +##sas +clocks +##rwood +##liness +cereal +roe +wight +decker +uttered +babu +onion +xml +forcibly +##df +petra +sarcasm +hartley +peeled +storytelling +##42 +##xley +##ysis +##ffa +fibre +kiel +auditor +fig +harald +greenville +##berries +geographically +nell +quartz +##athic +cemeteries +##lr +crossings +nah +holloway +reptiles +chun +sichuan +snowy +660 +corrections +##ivo +zheng +ambassadors +blacksmith +fielded +fluids +hardcover +turnover +medications +melvin +academies +##erton +ro +roach +absorbing +spaniards +colton +##founded +outsider +espionage +kelsey +245 +edible +##ulf +dora +establishes +##sham +##tries +contracting +##tania +cinematic +costello +nesting +##uron +connolly +duff +##nology +mma +##mata +fergus +sexes +gi +optics +spectator +woodstock +banning +##hee +##fle +differentiate +outfielder +refinery +226 +312 +gerhard +horde +lair +drastically +##udi +landfall +##cheng +motorsport +odi +##achi +predominant +quay +skins +##ental +edna +harshly +complementary +murdering +##aves +wreckage +##90 +ono +outstretched +lennox +munitions +galen +reconcile +470 +scalp +bicycles +gillespie +questionable +rosenberg +guillermo +hostel +jarvis +kabul +volvo +opium +yd +##twined +abuses +decca +outpost +##cino +sensible +neutrality +##64 +ponce +anchorage +atkins +turrets +inadvertently +disagree +libre +vodka +reassuring +weighs +##yal +glide +jumper +ceilings +repertory +outs +stain +##bial +envy +##ucible +smashing +heightened +policing +hyun +mixes +lai +prima +##ples +celeste +##bina +lucrative +intervened +kc +manually +##rned +stature +staffed +bun +bastards +nairobi +priced +##auer +thatcher +##kia +tripped +comune +##ogan +##pled +brasil +incentives +emanuel +hereford +musica +##kim +benedictine +biennale +##lani +eureka +gardiner +rb +knocks +sha +##ael +##elled +##onate +efficacy +ventura +masonic +sanford +maize +leverage +##feit +capacities +santana +##aur +novelty +vanilla +##cter +##tour +benin +##oir +##rain +neptune +drafting +tallinn +##cable +humiliation +##boarding +schleswig +fabian +bernardo +liturgy +spectacle +sweeney +pont +routledge +##tment +cosmos +ut +hilt +sleek +universally +##eville +##gawa +typed +##dry +favors +allegheny +glaciers +##rly +recalling +aziz +##log +parasite +requiem +auf +##berto +##llin +illumination +##breaker +##issa +festivities +bows +govern +vibe +vp +333 +sprawled +larson +pilgrim +bwf +leaping +##rts +##ssel +alexei +greyhound +hoarse +##dler +##oration +seneca +##cule +gaping +##ulously +##pura +cinnamon +##gens +##rricular +craven +fantasies +houghton +engined +reigned +dictator +supervising +##oris +bogota +commentaries +unnatural +fingernails +spirituality +tighten +##tm +canadiens +protesting +intentional +cheers +sparta +##ytic +##iere +##zine +widen +belgarath +controllers +dodd +iaaf +navarre +##ication +defect +squire +steiner +whisky +##mins +560 +inevitably +tome +##gold +chew +##uid +##lid +elastic +##aby +streaked +alliances +jailed +regal +##ined +##phy +czechoslovak +narration +absently +##uld +bluegrass +guangdong +quran +criticizing +hose +hari +##liest +##owa +skier +streaks +deploy +##lom +raft +bose +dialed +huff +##eira +haifa +simplest +bursting +endings +ib +sultanate +##titled +franks +whitman +ensures +sven +##ggs +collaborators +forster +organising +ui +banished +napier +injustice +teller +layered +thump +##otti +roc +battleships +evidenced +fugitive +sadie +robotics +##roud +equatorial +geologist +##iza +yielding +##bron +##sr +internationale +mecca +##diment +sbs +skyline +toad +uploaded +reflective +undrafted +lal +leafs +bayern +##dai +lakshmi +shortlisted +##stick +##wicz +camouflage +donate +af +christi +lau +##acio +disclosed +nemesis +1761 +assemble +straining +northamptonshire +tal +##asi +bernardino +premature +heidi +42nd +coefficients +galactic +reproduce +buzzed +sensations +zionist +monsieur +myrtle +##eme +archery +strangled +musically +viewpoint +antiquities +bei +trailers +seahawks +cured +pee +preferring +tasmanian +lange +sul +##mail +##working +colder +overland +lucivar +massey +gatherings +haitian +##smith +disapproval +flaws +##cco +##enbach +1766 +npr +##icular +boroughs +creole +forums +techno +1755 +dent +abdominal +streetcar +##eson +##stream +procurement +gemini +predictable +##tya +acheron +christoph +feeder +fronts +vendor +bernhard +jammu +tumors +slang +##uber +goaltender +twists +curving +manson +vuelta +mer +peanut +confessions +pouch +unpredictable +allowance +theodor +vascular +##factory +bala +authenticity +metabolic +coughing +nanjing +##cea +pembroke +##bard +splendid +36th +ff +hourly +##ahu +elmer +handel +##ivate +awarding +thrusting +dl +experimentation +##hesion +##46 +caressed +entertained +steak +##rangle +biologist +orphans +baroness +oyster +stepfather +##dridge +mirage +reefs +speeding +##31 +barons +1764 +227 +inhabit +preached +repealed +##tral +honoring +boogie +captives +administer +johanna +##imate +gel +suspiciously +1767 +sobs +##dington +backbone +hayward +garry +##folding +##nesia +maxi +##oof +##ppe +ellison +galileo +##stand +crimea +frenzy +amour +bumper +matrices +natalia +baking +garth +palestinians +##grove +smack +conveyed +ensembles +gardening +##manship +##rup +##stituting +1640 +harvesting +topography +jing +shifters +dormitory +##carriage +##lston +ist +skulls +##stadt +dolores +jewellery +sarawak +##wai +##zier +fences +christy +confinement +tumbling +credibility +fir +stench +##bria +##plication +##nged +##sam +virtues +##belt +marjorie +pba +##eem +##made +celebrates +schooner +agitated +barley +fulfilling +anthropologist +##pro +restrict +novi +regulating +##nent +padres +##rani +##hesive +loyola +tabitha +milky +olson +proprietor +crambidae +guarantees +intercollegiate +ljubljana +hilda +##sko +ignorant +hooded +##lts +sardinia +##lidae +##vation +frontman +privileged +witchcraft +##gp +jammed +laude +poking +##than +bracket +amazement +yunnan +##erus +maharaja +linnaeus +264 +commissioning +milano +peacefully +##logies +akira +rani +regulator +##36 +grasses +##rance +luzon +crows +compiler +gretchen +seaman +edouard +tab +buccaneers +ellington +hamlets +whig +socialists +##anto +directorial +easton +mythological +##kr +##vary +rhineland +semantic +taut +dune +inventions +succeeds +##iter +replication +branched +##pired +jul +prosecuted +kangaroo +penetrated +##avian +middlesbrough +doses +bleak +madam +predatory +relentless +##vili +reluctance +##vir +hailey +crore +silvery +1759 +monstrous +swimmers +transmissions +hawthorn +informing +##eral +toilets +caracas +crouch +kb +##sett +295 +cartel +hadley +##aling +alexia +yvonne +##biology +cinderella +eton +superb +blizzard +stabbing +industrialist +maximus +##gm +##orus +groves +maud +clade +oversized +comedic +##bella +rosen +nomadic +fulham +montane +beverages +galaxies +redundant +swarm +##rot +##folia +##llis +buckinghamshire +fen +bearings +bahadur +##rom +gilles +phased +dynamite +faber +benoit +vip +##ount +##wd +booking +fractured +tailored +anya +spices +westwood +cairns +auditions +inflammation +steamed +##rocity +##acion +##urne +skyla +thereof +watford +torment +archdeacon +transforms +lulu +demeanor +fucked +serge +##sor +mckenna +minas +entertainer +##icide +caress +originate +residue +##sty +1740 +##ilised +##org +beech +##wana +subsidies +##ghton +emptied +gladstone +ru +firefighters +voodoo +##rcle +het +nightingale +tamara +edmond +ingredient +weaknesses +silhouette +285 +compatibility +withdrawing +hampson +##mona +anguish +giggling +##mber +bookstore +##jiang +southernmost +tilting +##vance +bai +economical +rf +briefcase +dreadful +hinted +projections +shattering +totaling +##rogate +analogue +indicted +periodical +fullback +##dman +haynes +##tenberg +##ffs +##ishment +1745 +thirst +stumble +penang +vigorous +##ddling +##kor +##lium +octave +##ove +##enstein +##inen +##ones +siberian +##uti +cbn +repeal +swaying +##vington +khalid +tanaka +unicorn +otago +plastered +lobe +riddle +##rella +perch +##ishing +croydon +filtered +graeme +tripoli +##ossa +crocodile +##chers +sufi +mined +##tung +inferno +lsu +##phi +swelled +utilizes +£2 +cale +periodicals +styx +hike +informally +coop +lund +##tidae +ala +hen +qui +transformations +disposed +sheath +chickens +##cade +fitzroy +sas +silesia +unacceptable +odisha +1650 +sabrina +pe +spokane +ratios +athena +massage +shen +dilemma +##drum +##riz +##hul +corona +doubtful +niall +##pha +##bino +fines +cite +acknowledging +bangor +ballard +bathurst +##resh +huron +mustered +alzheimer +garments +kinase +tyre +warship +##cp +flashback +pulmonary +braun +cheat +kamal +cyclists +constructions +grenades +ndp +traveller +excuses +stomped +signalling +trimmed +futsal +mosques +relevance +##wine +wta +##23 +##vah +##lter +hoc +##riding +optimistic +##´s +deco +sim +interacting +rejecting +moniker +waterways +##ieri +##oku +mayors +gdansk +outnumbered +pearls +##ended +##hampton +fairs +totals +dominating +262 +notions +stairway +compiling +pursed +commodities +grease +yeast +##jong +carthage +griffiths +residual +amc +contraction +laird +sapphire +##marine +##ivated +amalgamation +dissolve +inclination +lyle +packaged +altitudes +suez +canons +graded +lurched +narrowing +boasts +guise +wed +enrico +##ovsky +rower +scarred +bree +cub +iberian +protagonists +bargaining +proposing +trainers +voyages +vans +fishes +##aea +##ivist +##verance +encryption +artworks +kazan +sabre +cleopatra +hepburn +rotting +supremacy +mecklenburg +##brate +burrows +hazards +outgoing +flair +organizes +##ctions +scorpion +##usions +boo +234 +chevalier +dunedin +slapping +##34 +ineligible +pensions +##38 +##omic +manufactures +emails +bismarck +238 +weakening +blackish +ding +mcgee +quo +##rling +northernmost +xx +manpower +greed +sampson +clicking +##ange +##horpe +##inations +##roving +torre +##eptive +##moral +symbolism +38th +asshole +meritorious +outfits +splashed +biographies +sprung +astros +##tale +302 +737 +filly +raoul +nw +tokugawa +linden +clubhouse +##apa +tracts +romano +##pio +putin +tags +##note +chained +dickson +gunshot +moe +gunn +rashid +##tails +zipper +##bas +##nea +contrasted +##ply +##udes +plum +pharaoh +##pile +aw +comedies +ingrid +sandwiches +subdivisions +1100 +mariana +nokia +kamen +hz +delaney +veto +herring +##words +possessive +outlines +##roup +siemens +stairwell +rc +gallantry +messiah +palais +yells +233 +zeppelin +##dm +bolivar +##cede +smackdown +mckinley +##mora +##yt +muted +geologic +finely +unitary +avatar +hamas +maynard +rees +bog +contrasting +##rut +liv +chico +disposition +pixel +##erate +becca +dmitry +yeshiva +narratives +##lva +##ulton +mercenary +sharpe +tempered +navigate +stealth +amassed +keynes +##lini +untouched +##rrie +havoc +lithium +##fighting +abyss +graf +southward +wolverine +balloons +implements +ngos +transitions +##icum +ambushed +concacaf +dormant +economists +##dim +costing +csi +rana +universite +boulders +verity +##llon +collin +mellon +misses +cypress +fluorescent +lifeless +spence +##ulla +crewe +shepard +pak +revelations +##م +jolly +gibbons +paw +##dro +##quel +freeing +##test +shack +fries +palatine +##51 +##hiko +accompaniment +cruising +recycled +##aver +erwin +sorting +synthesizers +dyke +realities +sg +strides +enslaved +wetland +##ghan +competence +gunpowder +grassy +maroon +reactors +objection +##oms +carlson +gearbox +macintosh +radios +shelton +##sho +clergyman +prakash +254 +mongols +trophies +oricon +228 +stimuli +twenty20 +cantonese +cortes +mirrored +##saurus +bhp +cristina +melancholy +##lating +enjoyable +nuevo +##wny +downfall +schumacher +##ind +banging +lausanne +rumbled +paramilitary +reflex +ax +amplitude +migratory +##gall +##ups +midi +barnard +lastly +sherry +##hp +##nall +keystone +##kra +carleton +slippery +##53 +coloring +foe +socket +otter +##rgos +mats +##tose +consultants +bafta +bison +topping +##km +490 +primal +abandonment +transplant +atoll +hideous +mort +pained +reproduced +tae +howling +##turn +unlawful +billionaire +hotter +poised +lansing +##chang +dinamo +retro +messing +nfc +domesday +##mina +blitz +timed +##athing +##kley +ascending +gesturing +##izations +signaled +tis +chinatown +mermaid +savanna +jameson +##aint +catalina +##pet +##hers +cochrane +cy +chatting +##kus +alerted +computation +mused +noelle +majestic +mohawk +campo +octagonal +##sant +##hend +241 +aspiring +##mart +comprehend +iona +paralyzed +shimmering +swindon +rhone +##eley +reputed +configurations +pitchfork +agitation +francais +gillian +lipstick +##ilo +outsiders +pontifical +resisting +bitterness +sewer +rockies +##edd +##ucher +misleading +1756 +exiting +galloway +##nging +risked +##heart +246 +commemoration +schultz +##rka +integrating +##rsa +poses +shrieked +##weiler +guineas +gladys +jerking +owls +goldsmith +nightly +penetrating +##unced +lia +##33 +ignited +betsy +##aring +##thorpe +follower +vigorously +##rave +coded +kiran +knit +zoology +tbilisi +##28 +##bered +repository +govt +deciduous +dino +growling +##bba +enhancement +unleashed +chanting +pussy +biochemistry +##eric +kettle +repression +toxicity +nrhp +##arth +##kko +##bush +ernesto +commended +outspoken +242 +mca +parchment +sms +kristen +##aton +bisexual +raked +glamour +navajo +a2 +conditioned +showcased +##hma +spacious +youthful +##esa +usl +appliances +junta +brest +layne +conglomerate +enchanted +chao +loosened +picasso +circulating +inspect +montevideo +##centric +##kti +piazza +spurred +##aith +bari +freedoms +poultry +stamford +lieu +##ect +indigo +sarcastic +bahia +stump +attach +dvds +frankenstein +lille +approx +scriptures +pollen +##script +nmi +overseen +##ivism +tides +proponent +newmarket +inherit +milling +##erland +centralized +##rou +distributors +credentials +drawers +abbreviation +##lco +##xon +downing +uncomfortably +ripe +##oes +erase +franchises +##ever +populace +##bery +##khar +decomposition +pleas +##tet +daryl +sabah +##stle +##wide +fearless +genie +lesions +annette +##ogist +oboe +appendix +nair +dripped +petitioned +maclean +mosquito +parrot +rpg +hampered +1648 +operatic +reservoirs +##tham +irrelevant +jolt +summarized +##fp +medallion +##taff +##− +clawed +harlow +narrower +goddard +marcia +bodied +fremont +suarez +altering +tempest +mussolini +porn +##isms +sweetly +oversees +walkers +solitude +grimly +shrines +hk +ich +supervisors +hostess +dietrich +legitimacy +brushes +expressive +##yp +dissipated +##rse +localized +systemic +##nikov +gettysburg +##js +##uaries +dialogues +muttering +251 +housekeeper +sicilian +discouraged +##frey +beamed +kaladin +halftime +kidnap +##amo +##llet +1754 +synonymous +depleted +instituto +insulin +reprised +##opsis +clashed +##ctric +interrupting +radcliffe +insisting +medici +1715 +ejected +playfully +turbulent +##47 +starvation +##rini +shipment +rebellious +petersen +verification +merits +##rified +cakes +##charged +1757 +milford +shortages +spying +fidelity +##aker +emitted +storylines +harvested +seismic +##iform +cheung +kilda +theoretically +barbie +lynx +##rgy +##tius +goblin +mata +poisonous +##nburg +reactive +residues +obedience +##евич +conjecture +##rac +401 +hating +sixties +kicker +moaning +motown +##bha +emancipation +neoclassical +##hering +consoles +ebert +professorship +##tures +sustaining +assaults +obeyed +affluent +incurred +tornadoes +##eber +##zow +emphasizing +highlanders +cheated +helmets +##ctus +internship +terence +bony +executions +legislators +berries +peninsular +tinged +##aco +1689 +amplifier +corvette +ribbons +lavish +pennant +##lander +worthless +##chfield +##forms +mariano +pyrenees +expenditures +##icides +chesterfield +mandir +tailor +39th +sergey +nestled +willed +aristocracy +devotees +goodnight +raaf +rumored +weaponry +remy +appropriations +harcourt +burr +riaa +##lence +limitation +unnoticed +guo +soaking +swamps +##tica +collapsing +tatiana +descriptive +brigham +psalm +##chment +maddox +##lization +patti +caliph +##aja +akron +injuring +serra +##ganj +basins +##sari +astonished +launcher +##church +hilary +wilkins +sewing +##sf +stinging +##fia +##ncia +underwood +startup +##ition +compilations +vibrations +embankment +jurist +##nity +bard +juventus +groundwater +kern +palaces +helium +boca +cramped +marissa +soto +##worm +jae +princely +##ggy +faso +bazaar +warmly +##voking +229 +pairing +##lite +##grate +##nets +wien +freaked +ulysses +rebirth +##alia +##rent +mummy +guzman +jimenez +stilled +##nitz +trajectory +tha +woken +archival +professions +##pts +##pta +hilly +shadowy +shrink +##bolt +norwood +glued +migrate +stereotypes +devoid +##pheus +625 +evacuate +horrors +infancy +gotham +knowles +optic +downloaded +sachs +kingsley +parramatta +darryl +mor +##onale +shady +commence +confesses +kan +##meter +##placed +marlborough +roundabout +regents +frigates +io +##imating +gothenburg +revoked +carvings +clockwise +convertible +intruder +##sche +banged +##ogo +vicky +bourgeois +##mony +dupont +footing +##gum +pd +##real +buckle +yun +penthouse +sane +720 +serviced +stakeholders +neumann +bb +##eers +comb +##gam +catchment +pinning +rallies +typing +##elles +forefront +freiburg +sweetie +giacomo +widowed +goodwill +worshipped +aspirations +midday +##vat +fishery +##trick +bournemouth +turk +243 +hearth +ethanol +guadalajara +murmurs +sl +##uge +afforded +scripted +##hta +wah +##jn +coroner +translucent +252 +memorials +puck +progresses +clumsy +##race +315 +candace +recounted +##27 +##slin +##uve +filtering +##mac +howl +strata +heron +leveled +##ays +dubious +##oja +##т +##wheel +citations +exhibiting +##laya +##mics +##pods +turkic +##lberg +injunction +##ennial +##mit +antibodies +##44 +organise +##rigues +cardiovascular +cushion +inverness +##zquez +dia +cocoa +sibling +##tman +##roid +expanse +feasible +tunisian +algiers +##relli +rus +bloomberg +dso +westphalia +bro +tacoma +281 +downloads +##ours +konrad +duran +##hdi +continuum +jett +compares +legislator +secession +##nable +##gues +##zuka +translating +reacher +##gley +##ła +aleppo +##agi +tc +orchards +trapping +linguist +versatile +drumming +postage +calhoun +superiors +##mx +barefoot +leary +##cis +ignacio +alfa +kaplan +##rogen +bratislava +mori +##vot +disturb +haas +313 +cartridges +gilmore +radiated +salford +tunic +hades +##ulsive +archeological +delilah +magistrates +auditioned +brewster +charters +empowerment +blogs +cappella +dynasties +iroquois +whipping +##krishna +raceway +truths +myra +weaken +judah +mcgregor +##horse +mic +refueling +37th +burnley +bosses +markus +premio +query +##gga +dunbar +##economic +darkest +lyndon +sealing +commendation +reappeared +##mun +addicted +ezio +slaughtered +satisfactory +shuffle +##eves +##thic +##uj +fortification +warrington +##otto +resurrected +fargo +mane +##utable +##lei +##space +foreword +ox +##aris +##vern +abrams +hua +##mento +sakura +##alo +uv +sentimental +##skaya +midfield +##eses +sturdy +scrolls +macleod +##kyu +entropy +##lance +mitochondrial +cicero +excelled +thinner +convoys +perceive +##oslav +##urable +systematically +grind +burkina +287 +##tagram +ops +##aman +guantanamo +##cloth +##tite +forcefully +wavy +##jou +pointless +##linger +##tze +layton +portico +superficial +clerical +outlaws +##hism +burials +muir +##inn +creditors +hauling +rattle +##leg +calais +monde +archers +reclaimed +dwell +wexford +hellenic +falsely +remorse +##tek +dough +furnishings +##uttered +gabon +neurological +novice +##igraphy +contemplated +pulpit +nightstand +saratoga +##istan +documenting +pulsing +taluk +##firmed +busted +marital +##rien +disagreements +wasps +##yes +hodge +mcdonnell +mimic +fran +pendant +dhabi +musa +##nington +congratulations +argent +darrell +concussion +losers +regrets +thessaloniki +reversal +donaldson +hardwood +thence +achilles +ritter +##eran +demonic +jurgen +prophets +goethe +eki +classmate +buff +##cking +yank +irrational +##inging +perished +seductive +qur +sourced +##crat +##typic +mustard +ravine +barre +horizontally +characterization +phylogenetic +boise +##dit +##runner +##tower +brutally +intercourse +seduce +##bbing +fay +ferris +ogden +amar +nik +unarmed +##inator +evaluating +kyrgyzstan +sweetness +##lford +##oki +mccormick +meiji +notoriety +stimulate +disrupt +figuring +instructional +mcgrath +##zoo +groundbreaking +##lto +flinch +khorasan +agrarian +bengals +mixer +radiating +##sov +ingram +pitchers +nad +tariff +##cript +tata +##codes +##emi +##ungen +appellate +lehigh +##bled +##giri +brawl +duct +texans +##ciation +##ropolis +skipper +speculative +vomit +doctrines +stresses +253 +davy +graders +whitehead +jozef +timely +cumulative +haryana +paints +appropriately +boon +cactus +##ales +##pid +dow +legions +##pit +perceptions +1730 +picturesque +##yse +periphery +rune +wr +##aha +celtics +sentencing +whoa +##erin +confirms +variance +425 +moines +mathews +spade +rave +m1 +fronted +fx +blending +alleging +reared +##gl +237 +##paper +grassroots +eroded +##free +##physical +directs +ordeal +##sław +accelerate +hacker +rooftop +##inia +lev +buys +cebu +devote +##lce +specialising +##ulsion +choreographed +repetition +warehouses +##ryl +paisley +tuscany +analogy +sorcerer +hash +huts +shards +descends +exclude +nix +chaplin +gaga +ito +vane +##drich +causeway +misconduct +limo +orchestrated +glands +jana +##kot +u2 +##mple +##sons +branching +contrasts +scoop +longed +##virus +chattanooga +##75 +syrup +cornerstone +##tized +##mind +##iaceae +careless +precedence +frescoes +##uet +chilled +consult +modelled +snatch +peat +##thermal +caucasian +humane +relaxation +spins +temperance +##lbert +occupations +lambda +hybrids +moons +mp3 +##oese +247 +rolf +societal +yerevan +ness +##ssler +befriended +mechanized +nominate +trough +boasted +cues +seater +##hom +bends +##tangle +conductors +emptiness +##lmer +eurasian +adriatic +tian +##cie +anxiously +lark +propellers +chichester +jock +ev +2a +##holding +credible +recounts +tori +loyalist +abduction +##hoot +##redo +nepali +##mite +ventral +tempting +##ango +##crats +steered +##wice +javelin +dipping +laborers +prentice +looming +titanium +##ː +badges +emir +tensor +##ntation +egyptians +rash +denies +hawthorne +lombard +showers +wehrmacht +dietary +trojan +##reus +welles +executing +horseshoe +lifeboat +##lak +elsa +infirmary +nearing +roberta +boyer +mutter +trillion +joanne +##fine +##oked +sinks +vortex +uruguayan +clasp +sirius +##block +accelerator +prohibit +sunken +byu +chronological +diplomats +ochreous +510 +symmetrical +1644 +maia +##tology +salts +reigns +atrocities +##ия +hess +bared +issn +##vyn +cater +saturated +##cycle +##isse +sable +voyager +dyer +yusuf +##inge +fountains +wolff +##39 +##nni +engraving +rollins +atheist +ominous +##ault +herr +chariot +martina +strung +##fell +##farlane +horrific +sahib +gazes +saetan +erased +ptolemy +##olic +flushing +lauderdale +analytic +##ices +530 +navarro +beak +gorilla +herrera +broom +guadalupe +raiding +sykes +311 +bsc +deliveries +1720 +invasions +carmichael +tajikistan +thematic +ecumenical +sentiments +onstage +##rians +##brand +##sume +catastrophic +flanks +molten +##arns +waller +aimee +terminating +##icing +alternately +##oche +nehru +printers +outraged +##eving +empires +template +banners +repetitive +za +##oise +vegetarian +##tell +guiana +opt +cavendish +lucknow +synthesized +##hani +##mada +finalized +##ctable +fictitious +mayoral +unreliable +##enham +embracing +peppers +rbis +##chio +##neo +inhibition +slashed +togo +orderly +embroidered +safari +salty +236 +barron +benito +totaled +##dak +pubs +simulated +caden +devin +tolkien +momma +welding +sesame +##ept +gottingen +hardness +630 +shaman +temeraire +620 +adequately +pediatric +##kit +ck +assertion +radicals +composure +cadence +seafood +beaufort +lazarus +mani +warily +cunning +kurdistan +249 +cantata +##kir +ares +##41 +##clusive +nape +townland +geared +insulted +flutter +boating +violate +draper +dumping +malmo +##hh +##romatic +firearm +alta +bono +obscured +##clave +exceeds +panorama +unbelievable +##train +preschool +##essed +disconnected +installing +rescuing +secretaries +accessibility +##castle +##drive +##ifice +##film +bouts +slug +waterway +mindanao +##buro +##ratic +halves +##ل +calming +liter +maternity +adorable +bragg +electrification +mcc +##dote +roxy +schizophrenia +##body +munoz +kaye +whaling +239 +mil +tingling +tolerant +##ago +unconventional +volcanoes +##finder +deportivo +##llie +robson +kaufman +neuroscience +wai +deportation +masovian +scraping +converse +##bh +hacking +bulge +##oun +administratively +yao +580 +amp +mammoth +booster +claremont +hooper +nomenclature +pursuits +mclaughlin +melinda +##sul +catfish +barclay +substrates +taxa +zee +originals +kimberly +packets +padma +##ality +borrowing +ostensibly +solvent +##bri +##genesis +##mist +lukas +shreveport +veracruz +##ь +##lou +##wives +cheney +tt +anatolia +hobbs +##zyn +cyclic +radiant +alistair +greenish +siena +dat +independents +##bation +conform +pieter +hyper +applicant +bradshaw +spores +telangana +vinci +inexpensive +nuclei +322 +jang +nme +soho +spd +##ign +cradled +receptionist +pow +##43 +##rika +fascism +##ifer +experimenting +##ading +##iec +##region +345 +jocelyn +maris +stair +nocturnal +toro +constabulary +elgin +##kker +msc +##giving +##schen +##rase +doherty +doping +sarcastically +batter +maneuvers +##cano +##apple +##gai +##git +intrinsic +##nst +##stor +1753 +showtime +cafes +gasps +lviv +ushered +##thed +fours +restart +astonishment +transmitting +flyer +shrugs +##sau +intriguing +cones +dictated +mushrooms +medial +##kovsky +##elman +escorting +gaped +##26 +godfather +##door +##sell +djs +recaptured +timetable +vila +1710 +3a +aerodrome +mortals +scientology +##orne +angelina +mag +convection +unpaid +insertion +intermittent +lego +##nated +endeavor +kota +pereira +##lz +304 +bwv +glamorgan +insults +agatha +fey +##cend +fleetwood +mahogany +protruding +steamship +zeta +##arty +mcguire +suspense +##sphere +advising +urges +##wala +hurriedly +meteor +gilded +inline +arroyo +stalker +##oge +excitedly +revered +##cure +earle +introductory +##break +##ilde +mutants +puff +pulses +reinforcement +##haling +curses +lizards +stalk +correlated +##fixed +fallout +macquarie +##unas +bearded +denton +heaving +802 +##ocation +winery +assign +dortmund +##lkirk +everest +invariant +charismatic +susie +##elling +bled +lesley +telegram +sumner +bk +##ogen +##к +wilcox +needy +colbert +duval +##iferous +##mbled +allotted +attends +imperative +##hita +replacements +hawker +##inda +insurgency +##zee +##eke +casts +##yla +680 +ives +transitioned +##pack +##powering +authoritative +baylor +flex +cringed +plaintiffs +woodrow +##skie +drastic +ape +aroma +unfolded +commotion +nt +preoccupied +theta +routines +lasers +privatization +wand +domino +ek +clenching +nsa +strategically +showered +bile +handkerchief +pere +storing +christophe +insulting +316 +nakamura +romani +asiatic +magdalena +palma +cruises +stripping +405 +konstantin +soaring +##berman +colloquially +forerunner +havilland +incarcerated +parasites +sincerity +##utus +disks +plank +saigon +##ining +corbin +homo +ornaments +powerhouse +##tlement +chong +fastened +feasibility +idf +morphological +usable +##nish +##zuki +aqueduct +jaguars +keepers +##flies +aleksandr +faust +assigns +ewing +bacterium +hurled +tricky +hungarians +integers +wallis +321 +yamaha +##isha +hushed +oblivion +aviator +evangelist +friars +##eller +monograph +ode +##nary +airplanes +labourers +charms +##nee +1661 +hagen +tnt +rudder +fiesta +transcript +dorothea +ska +inhibitor +maccabi +retorted +raining +encompassed +clauses +menacing +1642 +lineman +##gist +vamps +##ape +##dick +gloom +##rera +dealings +easing +seekers +##nut +##pment +helens +unmanned +##anu +##isson +basics +##amy +##ckman +adjustments +1688 +brutality +horne +##zell +sui +##55 +##mable +aggregator +##thal +rhino +##drick +##vira +counters +zoom +##01 +##rting +mn +montenegrin +packard +##unciation +##♭ +##kki +reclaim +scholastic +thugs +pulsed +##icia +syriac +quan +saddam +banda +kobe +blaming +buddies +dissent +##lusion +##usia +corbett +jaya +delle +erratic +lexie +##hesis +435 +amiga +hermes +##pressing +##leen +chapels +gospels +jamal +##uating +compute +revolving +warp +##sso +##thes +armory +##eras +##gol +antrim +loki +##kow +##asian +##good +##zano +braid +handwriting +subdistrict +funky +pantheon +##iculate +concurrency +estimation +improper +juliana +##his +newcomers +johnstone +staten +communicated +##oco +##alle +sausage +stormy +##stered +##tters +superfamily +##grade +acidic +collateral +tabloid +##oped +##rza +bladder +austen +##ellant +mcgraw +##hay +hannibal +mein +aquino +lucifer +wo +badger +boar +cher +christensen +greenberg +interruption +##kken +jem +244 +mocked +bottoms +cambridgeshire +##lide +sprawling +##bbly +eastwood +ghent +synth +##buck +advisers +##bah +nominally +hapoel +qu +daggers +estranged +fabricated +towels +vinnie +wcw +misunderstanding +anglia +nothin +unmistakable +##dust +##lova +chilly +marquette +truss +##edge +##erine +reece +##lty +##chemist +##connected +272 +308 +41st +bash +raion +waterfalls +##ump +##main +labyrinth +queue +theorist +##istle +bharatiya +flexed +soundtracks +rooney +leftist +patrolling +wharton +plainly +alleviate +eastman +schuster +topographic +engages +immensely +unbearable +fairchild +1620 +dona +lurking +parisian +oliveira +ia +indictment +hahn +bangladeshi +##aster +vivo +##uming +##ential +antonia +expects +indoors +kildare +harlan +##logue +##ogenic +##sities +forgiven +##wat +childish +tavi +##mide +##orra +plausible +grimm +successively +scooted +##bola +##dget +##rith +spartans +emery +flatly +azure +epilogue +##wark +flourish +##iny +##tracted +##overs +##oshi +bestseller +distressed +receipt +spitting +hermit +topological +##cot +drilled +subunit +francs +##layer +eel +##fk +##itas +octopus +footprint +petitions +ufo +##say +##foil +interfering +leaking +palo +##metry +thistle +valiant +##pic +narayan +mcpherson +##fast +gonzales +##ym +##enne +dustin +novgorod +solos +##zman +doin +##raph +##patient +##meyer +soluble +ashland +cuffs +carole +pendleton +whistling +vassal +##river +deviation +revisited +constituents +rallied +rotate +loomed +##eil +##nting +amateurs +augsburg +auschwitz +crowns +skeletons +##cona +bonnet +257 +dummy +globalization +simeon +sleeper +mandal +differentiated +##crow +##mare +milne +bundled +exasperated +talmud +owes +segregated +##feng +##uary +dentist +piracy +props +##rang +devlin +##torium +malicious +paws +##laid +dependency +##ergy +##fers +##enna +258 +pistons +rourke +jed +grammatical +tres +maha +wig +512 +ghostly +jayne +##achal +##creen +##ilis +##lins +##rence +designate +##with +arrogance +cambodian +clones +showdown +throttle +twain +##ception +lobes +metz +nagoya +335 +braking +##furt +385 +roaming +##minster +amin +crippled +##37 +##llary +indifferent +hoffmann +idols +intimidating +1751 +261 +influenza +memo +onions +1748 +bandage +consciously +##landa +##rage +clandestine +observes +swiped +tangle +##ener +##jected +##trum +##bill +##lta +hugs +congresses +josiah +spirited +##dek +humanist +managerial +filmmaking +inmate +rhymes +debuting +grimsby +ur +##laze +duplicate +vigor +##tf +republished +bolshevik +refurbishment +antibiotics +martini +methane +newscasts +royale +horizons +levant +iain +visas +##ischen +paler +##around +manifestation +snuck +alf +chop +futile +pedestal +rehab +##kat +bmg +kerman +res +fairbanks +jarrett +abstraction +saharan +##zek +1746 +procedural +clearer +kincaid +sash +luciano +##ffey +crunch +helmut +##vara +revolutionaries +##tute +creamy +leach +##mmon +1747 +permitting +nes +plight +wendell +##lese +contra +ts +clancy +ipa +mach +staples +autopsy +disturbances +nueva +karin +pontiac +##uding +proxy +venerable +haunt +leto +bergman +expands +##helm +wal +##pipe +canning +celine +cords +obesity +##enary +intrusion +planner +##phate +reasoned +sequencing +307 +harrow +##chon +##dora +marred +mcintyre +repay +tarzan +darting +248 +harrisburg +margarita +repulsed +##hur +##lding +belinda +hamburger +novo +compliant +runways +bingham +registrar +skyscraper +ic +cuthbert +improvisation +livelihood +##corp +##elial +admiring +##dened +sporadic +believer +casablanca +popcorn +##29 +asha +shovel +##bek +##dice +coiled +tangible +##dez +casper +elsie +resin +tenderness +rectory +##ivision +avail +sonar +##mori +boutique +##dier +guerre +bathed +upbringing +vaulted +sandals +blessings +##naut +##utnant +1680 +306 +foxes +pia +corrosion +hesitantly +confederates +crystalline +footprints +shapiro +tirana +valentin +drones +45th +microscope +shipments +texted +inquisition +wry +guernsey +unauthorized +resigning +760 +ripple +schubert +stu +reassure +felony +##ardo +brittle +koreans +##havan +##ives +dun +implicit +tyres +##aldi +##lth +magnolia +##ehan +##puri +##poulos +aggressively +fei +gr +familiarity +##poo +indicative +##trust +fundamentally +jimmie +overrun +395 +anchors +moans +##opus +britannia +armagh +##ggle +purposely +seizing +##vao +bewildered +mundane +avoidance +cosmopolitan +geometridae +quartermaster +caf +415 +chatter +engulfed +gleam +purge +##icate +juliette +jurisprudence +guerra +revisions +##bn +casimir +brew +##jm +1749 +clapton +cloudy +conde +hermitage +278 +simulations +torches +vincenzo +matteo +##rill +hidalgo +booming +westbound +accomplishment +tentacles +unaffected +##sius +annabelle +flopped +sloping +##litz +dreamer +interceptor +vu +##loh +consecration +copying +messaging +breaker +climates +hospitalized +1752 +torino +afternoons +winfield +witnessing +##teacher +breakers +choirs +sawmill +coldly +##ege +sipping +haste +uninhabited +conical +bibliography +pamphlets +severn +edict +##oca +deux +illnesses +grips +##pl +rehearsals +sis +thinkers +tame +##keepers +1690 +acacia +reformer +##osed +##rys +shuffling +##iring +##shima +eastbound +ionic +rhea +flees +littered +##oum +rocker +vomiting +groaning +champ +overwhelmingly +civilizations +paces +sloop +adoptive +##tish +skaters +##vres +aiding +mango +##joy +nikola +shriek +##ignon +pharmaceuticals +##mg +tuna +calvert +gustavo +stocked +yearbook +##urai +##mana +computed +subsp +riff +hanoi +kelvin +hamid +moors +pastures +summons +jihad +nectar +##ctors +bayou +untitled +pleasing +vastly +republics +intellect +##η +##ulio +##tou +crumbling +stylistic +sb +##ی +consolation +frequented +h₂o +walden +widows +##iens +404 +##ignment +chunks +improves +288 +grit +recited +##dev +snarl +sociological +##arte +##gul +inquired +##held +bruise +clube +consultancy +homogeneous +hornets +multiplication +pasta +prick +savior +##grin +##kou +##phile +yoon +##gara +grimes +vanishing +cheering +reacting +bn +distillery +##quisite +##vity +coe +dockyard +massif +##jord +escorts +voss +##valent +byte +chopped +hawke +illusions +workings +floats +##koto +##vac +kv +annapolis +madden +##onus +alvaro +noctuidae +##cum +##scopic +avenge +steamboat +forte +illustrates +erika +##trip +570 +dew +nationalities +bran +manifested +thirsty +diversified +muscled +reborn +##standing +arson +##lessness +##dran +##logram +##boys +##kushima +##vious +willoughby +##phobia +286 +alsace +dashboard +yuki +##chai +granville +myspace +publicized +tricked +##gang +adjective +##ater +relic +reorganisation +enthusiastically +indications +saxe +##lassified +consolidate +iec +padua +helplessly +ramps +renaming +regulars +pedestrians +accents +convicts +inaccurate +lowers +mana +##pati +barrie +bjp +outta +someplace +berwick +flanking +invoked +marrow +sparsely +excerpts +clothed +rei +##ginal +wept +##straße +##vish +alexa +excel +##ptive +membranes +aquitaine +creeks +cutler +sheppard +implementations +ns +##dur +fragrance +budge +concordia +magnesium +marcelo +##antes +gladly +vibrating +##rral +##ggles +montrose +##omba +lew +seamus +1630 +cocky +##ament +##uen +bjorn +##rrick +fielder +fluttering +##lase +methyl +kimberley +mcdowell +reductions +barbed +##jic +##tonic +aeronautical +condensed +distracting +##promising +huffed +##cala +##sle +claudius +invincible +missy +pious +balthazar +ci +##lang +butte +combo +orson +##dication +myriad +1707 +silenced +##fed +##rh +coco +netball +yourselves +##oza +clarify +heller +peg +durban +etudes +offender +roast +blackmail +curvature +##woods +vile +309 +illicit +suriname +##linson +overture +1685 +bubbling +gymnast +tucking +##mming +##ouin +maldives +##bala +gurney +##dda +##eased +##oides +backside +pinto +jars +racehorse +tending +##rdial +baronetcy +wiener +duly +##rke +barbarian +cupping +flawed +##thesis +bertha +pleistocene +puddle +swearing +##nob +##tically +fleeting +prostate +amulet +educating +##mined +##iti +##tler +75th +jens +respondents +analytics +cavaliers +papacy +raju +##iente +##ulum +##tip +funnel +271 +disneyland +##lley +sociologist +##iam +2500 +faulkner +louvre +menon +##dson +276 +##ower +afterlife +mannheim +peptide +referees +comedians +meaningless +##anger +##laise +fabrics +hurley +renal +sleeps +##bour +##icle +breakout +kristin +roadside +animator +clover +disdain +unsafe +redesign +##urity +firth +barnsley +portage +reset +narrows +268 +commandos +expansive +speechless +tubular +##lux +essendon +eyelashes +smashwords +##yad +##bang +##claim +craved +sprinted +chet +somme +astor +wrocław +orton +266 +bane +##erving +##uing +mischief +##amps +##sund +scaling +terre +##xious +impairment +offenses +undermine +moi +soy +contiguous +arcadia +inuit +seam +##tops +macbeth +rebelled +##icative +##iot +590 +elaborated +frs +uniformed +##dberg +259 +powerless +priscilla +stimulated +980 +qc +arboretum +frustrating +trieste +bullock +##nified +enriched +glistening +intern +##adia +locus +nouvelle +ollie +ike +lash +starboard +ee +tapestry +headlined +hove +rigged +##vite +pollock +##yme +thrive +clustered +cas +roi +gleamed +olympiad +##lino +pressured +regimes +##hosis +##lick +ripley +##ophone +kickoff +gallon +rockwell +##arable +crusader +glue +revolutions +scrambling +1714 +grover +##jure +englishman +aztec +263 +contemplating +coven +ipad +preach +triumphant +tufts +##esian +rotational +##phus +328 +falkland +##brates +strewn +clarissa +rejoin +environmentally +glint +banded +drenched +moat +albanians +johor +rr +maestro +malley +nouveau +shaded +taxonomy +v6 +adhere +bunk +airfields +##ritan +1741 +encompass +remington +tran +##erative +amelie +mazda +friar +morals +passions +##zai +breadth +vis +##hae +argus +burnham +caressing +insider +rudd +##imov +##mini +##rso +italianate +murderous +textual +wainwright +armada +bam +weave +timer +##taken +##nh +fra +##crest +ardent +salazar +taps +tunis +##ntino +allegro +gland +philanthropic +##chester +implication +##optera +esq +judas +noticeably +wynn +##dara +inched +indexed +crises +villiers +bandit +royalties +patterned +cupboard +interspersed +accessory +isla +kendrick +entourage +stitches +##esthesia +headwaters +##ior +interlude +distraught +draught +1727 +##basket +biased +sy +transient +triad +subgenus +adapting +kidd +shortstop +##umatic +dimly +spiked +mcleod +reprint +nellie +pretoria +windmill +##cek +singled +##mps +273 +reunite +##orous +747 +bankers +outlying +##omp +##ports +##tream +apologies +cosmetics +patsy +##deh +##ocks +##yson +bender +nantes +serene +##nad +lucha +mmm +323 +##cius +##gli +cmll +coinage +nestor +juarez +##rook +smeared +sprayed +twitching +sterile +irina +embodied +juveniles +enveloped +miscellaneous +cancers +dq +gulped +luisa +crested +swat +donegal +ref +##anov +##acker +hearst +mercantile +##lika +doorbell +ua +vicki +##alla +##som +bilbao +psychologists +stryker +sw +horsemen +turkmenistan +wits +##national +anson +mathew +screenings +##umb +rihanna +##agne +##nessy +aisles +##iani +##osphere +hines +kenton +saskatoon +tasha +truncated +##champ +##itan +mildred +advises +fredrik +interpreting +inhibitors +##athi +spectroscopy +##hab +##kong +karim +panda +##oia +##nail +##vc +conqueror +kgb +leukemia +##dity +arrivals +cheered +pisa +phosphorus +shielded +##riated +mammal +unitarian +urgently +chopin +sanitary +##mission +spicy +drugged +hinges +##tort +tipping +trier +impoverished +westchester +##caster +267 +epoch +nonstop +##gman +##khov +aromatic +centrally +cerro +##tively +##vio +billions +modulation +sedimentary +283 +facilitating +outrageous +goldstein +##eak +##kt +ld +maitland +penultimate +pollard +##dance +fleets +spaceship +vertebrae +##nig +alcoholism +als +recital +##bham +##ference +##omics +m2 +##bm +trois +##tropical +##в +commemorates +##meric +marge +##raction +1643 +670 +cosmetic +ravaged +##ige +catastrophe +eng +##shida +albrecht +arterial +bellamy +decor +harmon +##rde +bulbs +synchronized +vito +easiest +shetland +shielding +wnba +##glers +##ssar +##riam +brianna +cumbria +##aceous +##rard +cores +thayer +##nsk +brood +hilltop +luminous +carts +keynote +larkin +logos +##cta +##ا +##mund +##quay +lilith +tinted +277 +wrestle +mobilization +##uses +sequential +siam +bloomfield +takahashi +274 +##ieving +presenters +ringo +blazed +witty +##oven +##ignant +devastation +haydn +harmed +newt +therese +##peed +gershwin +molina +rabbis +sudanese +001 +innate +restarted +##sack +##fus +slices +wb +##shah +enroll +hypothetical +hysterical +1743 +fabio +indefinite +warped +##hg +exchanging +525 +unsuitable +##sboro +gallo +1603 +bret +cobalt +homemade +##hunter +mx +operatives +##dhar +terraces +durable +latch +pens +whorls +##ctuated +##eaux +billing +ligament +succumbed +##gly +regulators +spawn +##brick +##stead +filmfare +rochelle +##nzo +1725 +circumstance +saber +supplements +##nsky +##tson +crowe +wellesley +carrot +##9th +##movable +primate +drury +sincerely +topical +##mad +##rao +callahan +kyiv +smarter +tits +undo +##yeh +announcements +anthologies +barrio +nebula +##islaus +##shaft +##tyn +bodyguards +2021 +assassinate +barns +emmett +scully +##mah +##yd +##eland +##tino +##itarian +demoted +gorman +lashed +prized +adventist +writ +##gui +alla +invertebrates +##ausen +1641 +amman +1742 +align +healy +redistribution +##gf +##rize +insulation +##drop +adherents +hezbollah +vitro +ferns +yanking +269 +php +registering +uppsala +cheerleading +confines +mischievous +tully +##ross +49th +docked +roam +stipulated +pumpkin +##bry +prompt +##ezer +blindly +shuddering +craftsmen +frail +scented +katharine +scramble +shaggy +sponge +helix +zaragoza +279 +##52 +43rd +backlash +fontaine +seizures +posse +cowan +nonfiction +telenovela +wwii +hammered +undone +##gpur +encircled +irs +##ivation +artefacts +oneself +searing +smallpox +##belle +##osaurus +shandong +breached +upland +blushing +rankin +infinitely +psyche +tolerated +docking +evicted +##col +unmarked +##lving +gnome +lettering +litres +musique +##oint +benevolent +##jal +blackened +##anna +mccall +racers +tingle +##ocene +##orestation +introductions +radically +292 +##hiff +##باد +1610 +1739 +munchen +plead +##nka +condo +scissors +##sight +##tens +apprehension +##cey +##yin +hallmark +watering +formulas +sequels +##llas +aggravated +bae +commencing +##building +enfield +prohibits +marne +vedic +civilized +euclidean +jagger +beforehand +blasts +dumont +##arney +##nem +740 +conversions +hierarchical +rios +simulator +##dya +##lellan +hedges +oleg +thrusts +shadowed +darby +maximize +1744 +gregorian +##nded +##routed +sham +unspecified +##hog +emory +factual +##smo +##tp +fooled +##rger +ortega +wellness +marlon +##oton +##urance +casket +keating +ley +enclave +##ayan +char +influencing +jia +##chenko +412 +ammonia +erebidae +incompatible +violins +cornered +##arat +grooves +astronauts +columbian +rampant +fabrication +kyushu +mahmud +vanish +##dern +mesopotamia +##lete +ict +##rgen +caspian +kenji +pitted +##vered +999 +grimace +roanoke +tchaikovsky +twinned +##analysis +##awan +xinjiang +arias +clemson +kazakh +sizable +1662 +##khand +##vard +plunge +tatum +vittorio +##nden +cholera +##dana +##oper +bracing +indifference +projectile +superliga +##chee +realises +upgrading +299 +porte +retribution +##vies +nk +stil +##resses +ama +bureaucracy +blackberry +bosch +testosterone +collapses +greer +##pathic +ioc +fifties +malls +##erved +bao +baskets +adolescents +siegfried +##osity +##tosis +mantra +detecting +existent +fledgling +##cchi +dissatisfied +gan +telecommunication +mingled +sobbed +6000 +controversies +outdated +taxis +##raus +fright +slams +##lham +##fect +##tten +detectors +fetal +tanned +##uw +fray +goth +olympian +skipping +mandates +scratches +sheng +unspoken +hyundai +tracey +hotspur +restrictive +##buch +americana +mundo +##bari +burroughs +diva +vulcan +##6th +distinctions +thumping +##ngen +mikey +sheds +fide +rescues +springsteen +vested +valuation +##ece +##ely +pinnacle +rake +sylvie +##edo +almond +quivering +##irus +alteration +faltered +##wad +51st +hydra +ticked +##kato +recommends +##dicated +antigua +arjun +stagecoach +wilfred +trickle +pronouns +##pon +aryan +nighttime +##anian +gall +pea +stitch +##hei +leung +milos +##dini +eritrea +nexus +starved +snowfall +kant +parasitic +cot +discus +hana +strikers +appleton +kitchens +##erina +##partisan +##itha +##vius +disclose +metis +##channel +1701 +tesla +##vera +fitch +1735 +blooded +##tila +decimal +##tang +##bai +cyclones +eun +bottled +peas +pensacola +basha +bolivian +crabs +boil +lanterns +partridge +roofed +1645 +necks +##phila +opined +patting +##kla +##lland +chuckles +volta +whereupon +##nche +devout +euroleague +suicidal +##dee +inherently +involuntary +knitting +nasser +##hide +puppets +colourful +courageous +southend +stills +miraculous +hodgson +richer +rochdale +ethernet +greta +uniting +prism +umm +##haya +##itical +##utation +deterioration +pointe +prowess +##ropriation +lids +scranton +billings +subcontinent +##koff +##scope +brute +kellogg +psalms +degraded +##vez +stanisław +##ructured +ferreira +pun +astonishing +gunnar +##yat +arya +prc +gottfried +##tight +excursion +##ographer +dina +##quil +##nare +huffington +illustrious +wilbur +gundam +verandah +##zard +naacp +##odle +constructive +fjord +kade +##naud +generosity +thrilling +baseline +cayman +frankish +plastics +accommodations +zoological +##fting +cedric +qb +motorized +##dome +##otted +squealed +tackled +canucks +budgets +situ +asthma +dail +gabled +grasslands +whimpered +writhing +judgments +##65 +minnie +pv +##carbon +bananas +grille +domes +monique +odin +maguire +markham +tierney +##estra +##chua +libel +poke +speedy +atrium +laval +notwithstanding +##edly +fai +kala +##sur +robb +##sma +listings +luz +supplementary +tianjin +##acing +enzo +jd +ric +scanner +croats +transcribed +##49 +arden +cv +##hair +##raphy +##lver +##uy +357 +seventies +staggering +alam +horticultural +hs +regression +timbers +blasting +##ounded +montagu +manipulating +##cit +catalytic +1550 +troopers +##meo +condemnation +fitzpatrick +##oire +##roved +inexperienced +1670 +castes +##lative +outing +314 +dubois +flicking +quarrel +ste +learners +1625 +iq +whistled +##class +282 +classify +tariffs +temperament +355 +folly +liszt +##yles +immersed +jordanian +ceasefire +apparel +extras +maru +fished +##bio +harta +stockport +assortment +craftsman +paralysis +transmitters +##cola +blindness +##wk +fatally +proficiency +solemnly +##orno +repairing +amore +groceries +ultraviolet +##chase +schoolhouse +##tua +resurgence +nailed +##otype +##× +ruse +saliva +diagrams +##tructing +albans +rann +thirties +1b +antennas +hilarious +cougars +paddington +stats +##eger +breakaway +ipod +reza +authorship +prohibiting +scoffed +##etz +##ttle +conscription +defected +trondheim +##fires +ivanov +keenan +##adan +##ciful +##fb +##slow +locating +##ials +##tford +cadiz +basalt +blankly +interned +rags +rattling +##tick +carpathian +reassured +sync +bum +guildford +iss +staunch +##onga +astronomers +sera +sofie +emergencies +susquehanna +##heard +duc +mastery +vh1 +williamsburg +bayer +buckled +craving +##khan +##rdes +bloomington +##write +alton +barbecue +##bians +justine +##hri +##ndt +delightful +smartphone +newtown +photon +retrieval +peugeot +hissing +##monium +##orough +flavors +lighted +relaunched +tainted +##games +##lysis +anarchy +microscopic +hopping +adept +evade +evie +##beau +inhibit +sinn +adjustable +hurst +intuition +wilton +cisco +44th +lawful +lowlands +stockings +thierry +##dalen +##hila +##nai +fates +prank +tb +maison +lobbied +provocative +1724 +4a +utopia +##qual +carbonate +gujarati +purcell +##rford +curtiss +##mei +overgrown +arenas +mediation +swallows +##rnik +respectful +turnbull +##hedron +##hope +alyssa +ozone +##ʻi +ami +gestapo +johansson +snooker +canteen +cuff +declines +empathy +stigma +##ags +##iner +##raine +taxpayers +gui +volga +##wright +##copic +lifespan +overcame +tattooed +enactment +giggles +##ador +##camp +barrington +bribe +obligatory +orbiting +peng +##enas +elusive +sucker +##vating +cong +hardship +empowered +anticipating +estrada +cryptic +greasy +detainees +planck +sudbury +plaid +dod +marriott +kayla +##ears +##vb +##zd +mortally +##hein +cognition +radha +319 +liechtenstein +meade +richly +argyle +harpsichord +liberalism +trumpets +lauded +tyrant +salsa +tiled +lear +promoters +reused +slicing +trident +##chuk +##gami +##lka +cantor +checkpoint +##points +gaul +leger +mammalian +##tov +##aar +##schaft +doha +frenchman +nirvana +##vino +delgado +headlining +##eron +##iography +jug +tko +1649 +naga +intersections +##jia +benfica +nawab +##suka +ashford +gulp +##deck +##vill +##rug +brentford +frazier +pleasures +dunne +potsdam +shenzhen +dentistry +##tec +flanagan +##dorff +##hear +chorale +dinah +prem +quezon +##rogated +relinquished +sutra +terri +##pani +flaps +##rissa +poly +##rnet +homme +aback +##eki +linger +womb +##kson +##lewood +doorstep +orthodoxy +threaded +westfield +##rval +dioceses +fridays +subsided +##gata +loyalists +##biotic +##ettes +letterman +lunatic +prelate +tenderly +invariably +souza +thug +winslow +##otide +furlongs +gogh +jeopardy +##runa +pegasus +##umble +humiliated +standalone +tagged +##roller +freshmen +klan +##bright +attaining +initiating +transatlantic +logged +viz +##uance +1723 +combatants +intervening +stephane +chieftain +despised +grazed +317 +cdc +galveston +godzilla +macro +simulate +##planes +parades +##esses +960 +##ductive +##unes +equator +overdose +##cans +##hosh +##lifting +joshi +epstein +sonora +treacherous +aquatics +manchu +responsive +##sation +supervisory +##christ +##llins +##ibar +##balance +##uso +kimball +karlsruhe +mab +##emy +ignores +phonetic +reuters +spaghetti +820 +almighty +danzig +rumbling +tombstone +designations +lured +outset +##felt +supermarkets +##wt +grupo +kei +kraft +susanna +##blood +comprehension +genealogy +##aghan +##verted +redding +##ythe +1722 +bowing +##pore +##roi +lest +sharpened +fulbright +valkyrie +sikhs +##unds +swans +bouquet +merritt +##tage +##venting +commuted +redhead +clerks +leasing +cesare +dea +hazy +##vances +fledged +greenfield +servicemen +##gical +armando +blackout +dt +sagged +downloadable +intra +potion +pods +##4th +##mism +xp +attendants +gambia +stale +##ntine +plump +asteroids +rediscovered +buds +flea +hive +##neas +1737 +classifications +debuts +##eles +olympus +scala +##eurs +##gno +##mute +hummed +sigismund +visuals +wiggled +await +pilasters +clench +sulfate +##ances +bellevue +enigma +trainee +snort +##sw +clouded +denim +##rank +##rder +churning +hartman +lodges +riches +sima +##missible +accountable +socrates +regulates +mueller +##cr +1702 +avoids +solids +himalayas +nutrient +pup +##jevic +squat +fades +nec +##lates +##pina +##rona +##ου +privateer +tequila +##gative +##mpton +apt +hornet +immortals +##dou +asturias +cleansing +dario +##rries +##anta +etymology +servicing +zhejiang +##venor +##nx +horned +erasmus +rayon +relocating +£10 +##bags +escalated +promenade +stubble +2010s +artisans +axial +liquids +mora +sho +yoo +##tsky +bundles +oldies +##nally +notification +bastion +##ths +sparkle +##lved +1728 +leash +pathogen +highs +##hmi +immature +880 +gonzaga +ignatius +mansions +monterrey +sweets +bryson +##loe +polled +regatta +brightest +pei +rosy +squid +hatfield +payroll +addict +meath +cornerback +heaviest +lodging +##mage +capcom +rippled +##sily +barnet +mayhem +ymca +snuggled +rousseau +##cute +blanchard +284 +fragmented +leighton +chromosomes +risking +##md +##strel +##utter +corinne +coyotes +cynical +hiroshi +yeomanry +##ractive +ebook +grading +mandela +plume +agustin +magdalene +##rkin +bea +femme +trafford +##coll +##lun +##tance +52nd +fourier +upton +##mental +camilla +gust +iihf +islamabad +longevity +##kala +feldman +netting +##rization +endeavour +foraging +mfa +orr +##open +greyish +contradiction +graz +##ruff +handicapped +marlene +tweed +oaxaca +spp +campos +miocene +pri +configured +cooks +pluto +cozy +pornographic +##entes +70th +fairness +glided +jonny +lynne +rounding +sired +##emon +##nist +remade +uncover +##mack +complied +lei +newsweek +##jured +##parts +##enting +##pg +293 +finer +guerrillas +athenian +deng +disused +stepmother +accuse +gingerly +seduction +521 +confronting +##walker +##going +gora +nostalgia +sabres +virginity +wrenched +##minated +syndication +wielding +eyre +##56 +##gnon +##igny +behaved +taxpayer +sweeps +##growth +childless +gallant +##ywood +amplified +geraldine +scrape +##ffi +babylonian +fresco +##rdan +##kney +##position +1718 +restricting +tack +fukuoka +osborn +selector +partnering +##dlow +318 +gnu +kia +tak +whitley +gables +##54 +##mania +mri +softness +immersion +##bots +##evsky +1713 +chilling +insignificant +pcs +##uis +elites +lina +purported +supplemental +teaming +##americana +##dding +##inton +proficient +rouen +##nage +##rret +niccolo +selects +##bread +fluffy +1621 +gruff +knotted +mukherjee +polgara +thrash +nicholls +secluded +smoothing +thru +corsica +loaf +whitaker +inquiries +##rrier +##kam +indochina +289 +marlins +myles +peking +##tea +extracts +pastry +superhuman +connacht +vogel +##ditional +##het +##udged +##lash +gloss +quarries +refit +teaser +##alic +##gaon +20s +materialized +sling +camped +pickering +tung +tracker +pursuant +##cide +cranes +soc +##cini +##typical +##viere +anhalt +overboard +workout +chores +fares +orphaned +stains +##logie +fenton +surpassing +joyah +triggers +##itte +grandmaster +##lass +##lists +clapping +fraudulent +ledger +nagasaki +##cor +##nosis +##tsa +eucalyptus +tun +##icio +##rney +##tara +dax +heroism +ina +wrexham +onboard +unsigned +##dates +moshe +galley +winnie +droplets +exiles +praises +watered +noodles +##aia +fein +adi +leland +multicultural +stink +bingo +comets +erskine +modernized +canned +constraint +domestically +chemotherapy +featherweight +stifled +##mum +darkly +irresistible +refreshing +hasty +isolate +##oys +kitchener +planners +##wehr +cages +yarn +implant +toulon +elects +childbirth +yue +##lind +##lone +cn +rightful +sportsman +junctions +remodeled +specifies +##rgh +291 +##oons +complimented +##urgent +lister +ot +##logic +bequeathed +cheekbones +fontana +gabby +##dial +amadeus +corrugated +maverick +resented +triangles +##hered +##usly +nazareth +tyrol +1675 +assent +poorer +sectional +aegean +##cous +296 +nylon +ghanaian +##egorical +##weig +cushions +forbid +fusiliers +obstruction +somerville +##scia +dime +earrings +elliptical +leyte +oder +polymers +timmy +atm +midtown +piloted +settles +continual +externally +mayfield +##uh +enrichment +henson +keane +persians +1733 +benji +braden +pep +324 +##efe +contenders +pepsi +valet +##isches +298 +##asse +##earing +goofy +stroll +##amen +authoritarian +occurrences +adversary +ahmedabad +tangent +toppled +dorchester +1672 +modernism +marxism +islamist +charlemagne +exponential +racks +unicode +brunette +mbc +pic +skirmish +##bund +##lad +##powered +##yst +hoisted +messina +shatter +##ctum +jedi +vantage +##music +##neil +clemens +mahmoud +corrupted +authentication +lowry +nils +##washed +omnibus +wounding +jillian +##itors +##opped +serialized +narcotics +handheld +##arm +##plicity +intersecting +stimulating +##onis +crate +fellowships +hemingway +casinos +climatic +fordham +copeland +drip +beatty +leaflets +robber +brothel +madeira +##hedral +sphinx +ultrasound +##vana +valor +forbade +leonid +villas +##aldo +duane +marquez +##cytes +disadvantaged +forearms +kawasaki +reacts +consular +lax +uncles +uphold +##hopper +concepcion +dorsey +lass +##izan +arching +passageway +1708 +researches +tia +internationals +##graphs +##opers +distinguishes +javanese +divert +##uven +plotted +##listic +##rwin +##erik +##tify +affirmative +signifies +validation +##bson +kari +felicity +georgina +zulu +##eros +##rained +##rath +overcoming +##dot +argyll +##rbin +1734 +chiba +ratification +windy +earls +parapet +##marks +hunan +pristine +astrid +punta +##gart +brodie +##kota +##oder +malaga +minerva +rouse +##phonic +bellowed +pagoda +portals +reclamation +##gur +##odies +##⁄₄ +parentheses +quoting +allergic +palette +showcases +benefactor +heartland +nonlinear +##tness +bladed +cheerfully +scans +##ety +##hone +1666 +girlfriends +pedersen +hiram +sous +##liche +##nator +1683 +##nery +##orio +##umen +bobo +primaries +smiley +##cb +unearthed +uniformly +fis +metadata +1635 +ind +##oted +recoil +##titles +##tura +##ια +406 +hilbert +jamestown +mcmillan +tulane +seychelles +##frid +antics +coli +fated +stucco +##grants +1654 +bulky +accolades +arrays +caledonian +carnage +optimism +puebla +##tative +##cave +enforcing +rotherham +seo +dunlop +aeronautics +chimed +incline +zoning +archduke +hellenistic +##oses +##sions +candi +thong +##ople +magnate +rustic +##rsk +projective +slant +##offs +danes +hollis +vocalists +##ammed +congenital +contend +gesellschaft +##ocating +##pressive +douglass +quieter +##cm +##kshi +howled +salim +spontaneously +townsville +buena +southport +##bold +kato +1638 +faerie +stiffly +##vus +##rled +297 +flawless +realising +taboo +##7th +bytes +straightening +356 +jena +##hid +##rmin +cartwright +berber +bertram +soloists +411 +noses +417 +coping +fission +hardin +inca +##cen +1717 +mobilized +vhf +##raf +biscuits +curate +##85 +##anial +331 +gaunt +neighbourhoods +1540 +##abas +blanca +bypassed +sockets +behold +coincidentally +##bane +nara +shave +splinter +terrific +##arion +##erian +commonplace +juris +redwood +waistband +boxed +caitlin +fingerprints +jennie +naturalized +##ired +balfour +craters +jody +bungalow +hugely +quilt +glitter +pigeons +undertaker +bulging +constrained +goo +##sil +##akh +assimilation +reworked +##person +persuasion +##pants +felicia +##cliff +##ulent +1732 +explodes +##dun +##inium +##zic +lyman +vulture +hog +overlook +begs +northwards +ow +spoil +##urer +fatima +favorably +accumulate +sargent +sorority +corresponded +dispersal +kochi +toned +##imi +##lita +internacional +newfound +##agger +##lynn +##rigue +booths +peanuts +##eborg +medicare +muriel +nur +##uram +crates +millennia +pajamas +worsened +##breakers +jimi +vanuatu +yawned +##udeau +carousel +##hony +hurdle +##ccus +##mounted +##pod +rv +##eche +airship +ambiguity +compulsion +recapture +##claiming +arthritis +##osomal +1667 +asserting +ngc +sniffing +dade +discontent +glendale +ported +##amina +defamation +rammed +##scent +fling +livingstone +##fleet +875 +##ppy +apocalyptic +comrade +lcd +##lowe +cessna +eine +persecuted +subsistence +demi +hoop +reliefs +710 +coptic +progressing +stemmed +perpetrators +1665 +priestess +##nio +dobson +ebony +rooster +itf +tortricidae +##bbon +##jian +cleanup +##jean +##øy +1721 +eighties +taxonomic +holiness +##hearted +##spar +antilles +showcasing +stabilized +##nb +gia +mascara +michelangelo +dawned +##uria +##vinsky +extinguished +fitz +grotesque +£100 +##fera +##loid +##mous +barges +neue +throbbed +cipher +johnnie +##a1 +##mpt +outburst +##swick +spearheaded +administrations +c1 +heartbreak +pixels +pleasantly +##enay +lombardy +plush +##nsed +bobbie +##hly +reapers +tremor +xiang +minogue +substantive +hitch +barak +##wyl +kwan +##encia +910 +obscene +elegance +indus +surfer +bribery +conserve +##hyllum +##masters +horatio +##fat +apes +rebound +psychotic +##pour +iteration +##mium +##vani +botanic +horribly +antiques +dispose +paxton +##hli +##wg +timeless +1704 +disregard +engraver +hounds +##bau +##version +looted +uno +facilitates +groans +masjid +rutland +antibody +disqualification +decatur +footballers +quake +slacks +48th +rein +scribe +stabilize +commits +exemplary +tho +##hort +##chison +pantry +traversed +##hiti +disrepair +identifiable +vibrated +baccalaureate +##nnis +csa +interviewing +##iensis +##raße +greaves +wealthiest +343 +classed +jogged +£5 +##58 +##atal +illuminating +knicks +respecting +##uno +scrubbed +##iji +##dles +kruger +moods +growls +raider +silvia +chefs +kam +vr +cree +percival +##terol +gunter +counterattack +defiant +henan +ze +##rasia +##riety +equivalence +submissions +##fra +##thor +bautista +mechanically +##heater +cornice +herbal +templar +##mering +outputs +ruining +ligand +renumbered +extravagant +mika +blockbuster +eta +insurrection +##ilia +darkening +ferocious +pianos +strife +kinship +##aer +melee +##anor +##iste +##may +##oue +decidedly +weep +##jad +##missive +##ppel +354 +puget +unease +##gnant +1629 +hammering +kassel +ob +wessex +##lga +bromwich +egan +paranoia +utilization +##atable +##idad +contradictory +provoke +##ols +##ouring +##tangled +knesset +##very +##lette +plumbing +##sden +##¹ +greensboro +occult +sniff +338 +zev +beaming +gamer +haggard +mahal +##olt +##pins +mendes +utmost +briefing +gunnery +##gut +##pher +##zh +##rok +1679 +khalifa +sonya +##boot +principals +urbana +wiring +##liffe +##minating +##rrado +dahl +nyu +skepticism +np +townspeople +ithaca +lobster +somethin +##fur +##arina +##−1 +freighter +zimmerman +biceps +contractual +##herton +amend +hurrying +subconscious +##anal +336 +meng +clermont +spawning +##eia +##lub +dignitaries +impetus +snacks +spotting +twigs +##bilis +##cz +##ouk +libertadores +nic +skylar +##aina +##firm +gustave +asean +##anum +dieter +legislatures +flirt +bromley +trolls +umar +##bbies +##tyle +blah +parc +bridgeport +crank +negligence +##nction +46th +constantin +molded +bandages +seriousness +00pm +siegel +carpets +compartments +upbeat +statehood +##dner +##edging +marko +730 +platt +##hane +paving +##iy +1738 +abbess +impatience +limousine +nbl +##talk +441 +lucille +mojo +nightfall +robbers +##nais +karel +brisk +calves +replicate +ascribed +telescopes +##olf +intimidated +##reen +ballast +specialization +##sit +aerodynamic +caliphate +rainer +visionary +##arded +epsilon +##aday +##onte +aggregation +auditory +boosted +reunification +kathmandu +loco +robyn +402 +acknowledges +appointing +humanoid +newell +redeveloped +restraints +##tained +barbarians +chopper +1609 +italiana +##lez +##lho +investigates +wrestlemania +##anies +##bib +690 +##falls +creaked +dragoons +gravely +minions +stupidity +volley +##harat +##week +musik +##eries +##uously +fungal +massimo +semantics +malvern +##ahl +##pee +discourage +embryo +imperialism +1910s +profoundly +##ddled +jiangsu +sparkled +stat +##holz +sweatshirt +tobin +##iction +sneered +##cheon +##oit +brit +causal +smyth +##neuve +diffuse +perrin +silvio +##ipes +##recht +detonated +iqbal +selma +##nism +##zumi +roasted +##riders +tay +##ados +##mament +##mut +##rud +840 +completes +nipples +cfa +flavour +hirsch +##laus +calderon +sneakers +moravian +##ksha +1622 +rq +294 +##imeters +bodo +##isance +##pre +##ronia +anatomical +excerpt +##lke +dh +kunst +##tablished +##scoe +biomass +panted +unharmed +gael +housemates +montpellier +##59 +coa +rodents +tonic +hickory +singleton +##taro +451 +1719 +aldo +breaststroke +dempsey +och +rocco +##cuit +merton +dissemination +midsummer +serials +##idi +haji +polynomials +##rdon +gs +enoch +prematurely +shutter +taunton +£3 +##grating +##inates +archangel +harassed +##asco +326 +archway +dazzling +##ecin +1736 +sumo +wat +##kovich +1086 +honneur +##ently +##nostic +##ttal +##idon +1605 +403 +1716 +blogger +rents +##gnan +hires +##ikh +##dant +howie +##rons +handler +retracted +shocks +1632 +arun +duluth +kepler +trumpeter +##lary +peeking +seasoned +trooper +##mara +laszlo +##iciencies +##rti +heterosexual +##inatory +##ssion +indira +jogging +##inga +##lism +beit +dissatisfaction +malice +##ately +nedra +peeling +##rgeon +47th +stadiums +475 +vertigo +##ains +iced +restroom +##plify +##tub +illustrating +pear +##chner +##sibility +inorganic +rappers +receipts +watery +##kura +lucinda +##oulos +reintroduced +##8th +##tched +gracefully +saxons +nutritional +wastewater +rained +favourites +bedrock +fisted +hallways +likeness +upscale +##lateral +1580 +blinds +prequel +##pps +##tama +deter +humiliating +restraining +tn +vents +1659 +laundering +recess +rosary +tractors +coulter +federer +##ifiers +##plin +persistence +##quitable +geschichte +pendulum +quakers +##beam +bassett +pictorial +buffet +koln +##sitor +drills +reciprocal +shooters +##57 +##cton +##tees +converge +pip +dmitri +donnelly +yamamoto +aqua +azores +demographics +hypnotic +spitfire +suspend +wryly +roderick +##rran +sebastien +##asurable +mavericks +##fles +##200 +himalayan +prodigy +##iance +transvaal +demonstrators +handcuffs +dodged +mcnamara +sublime +1726 +crazed +##efined +##till +ivo +pondered +reconciled +shrill +sava +##duk +bal +cad +heresy +jaipur +goran +##nished +341 +lux +shelly +whitehall +##hre +israelis +peacekeeping +##wled +1703 +demetrius +ousted +##arians +##zos +beale +anwar +backstroke +raged +shrinking +cremated +##yck +benign +towing +wadi +darmstadt +landfill +parana +soothe +colleen +sidewalks +mayfair +tumble +hepatitis +ferrer +superstructure +##gingly +##urse +##wee +anthropological +translators +##mies +closeness +hooves +##pw +mondays +##roll +##vita +landscaping +##urized +purification +sock +thorns +thwarted +jalan +tiberius +##taka +saline +##rito +confidently +khyber +sculptors +##ij +brahms +hammersmith +inspectors +battista +fivb +fragmentation +hackney +##uls +arresting +exercising +antoinette +bedfordshire +##zily +dyed +##hema +1656 +racetrack +variability +##tique +1655 +austrians +deteriorating +madman +theorists +aix +lehman +weathered +1731 +decreed +eruptions +1729 +flaw +quinlan +sorbonne +flutes +nunez +1711 +adored +downwards +fable +rasped +1712 +moritz +mouthful +renegade +shivers +stunts +dysfunction +restrain +translit +327 +pancakes +##avio +##cision +##tray +351 +vial +##lden +bain +##maid +##oxide +chihuahua +malacca +vimes +##rba +##rnier +1664 +donnie +plaques +##ually +337 +bangs +floppy +huntsville +loretta +nikolay +##otte +eater +handgun +ubiquitous +##hett +eras +zodiac +1634 +##omorphic +1820s +##zog +cochran +##bula +##lithic +warring +##rada +dalai +excused +blazers +mcconnell +reeling +bot +este +##abi +geese +hoax +taxon +##bla +guitarists +##icon +condemning +hunts +inversion +moffat +taekwondo +##lvis +1624 +stammered +##rest +##rzy +sousa +fundraiser +marylebone +navigable +uptown +cabbage +daniela +salman +shitty +whimper +##kian +##utive +programmers +protections +rm +##rmi +##rued +forceful +##enes +fuss +##tao +##wash +brat +oppressive +reykjavik +spartak +ticking +##inkles +##kiewicz +adolph +horst +maui +protege +straighten +cpc +landau +concourse +clements +resultant +##ando +imaginative +joo +reactivated +##rem +##ffled +##uising +consultative +##guide +flop +kaitlyn +mergers +parenting +somber +##vron +supervise +vidhan +##imum +courtship +exemplified +harmonies +medallist +refining +##rrow +##ка +amara +##hum +780 +goalscorer +sited +overshadowed +rohan +displeasure +secretive +multiplied +osman +##orth +engravings +padre +##kali +##veda +miniatures +mis +##yala +clap +pali +rook +##cana +1692 +57th +antennae +astro +oskar +1628 +bulldog +crotch +hackett +yucatan +##sure +amplifiers +brno +ferrara +migrating +##gree +thanking +turing +##eza +mccann +ting +andersson +onslaught +gaines +ganga +incense +standardization +##mation +sentai +scuba +stuffing +turquoise +waivers +alloys +##vitt +regaining +vaults +##clops +##gizing +digger +furry +memorabilia +probing +##iad +payton +rec +deutschland +filippo +opaque +seamen +zenith +afrikaans +##filtration +disciplined +inspirational +##merie +banco +confuse +grafton +tod +##dgets +championed +simi +anomaly +biplane +##ceptive +electrode +##para +1697 +cleavage +crossbow +swirl +informant +##lars +##osta +afi +bonfire +spec +##oux +lakeside +slump +##culus +##lais +##qvist +##rrigan +1016 +facades +borg +inwardly +cervical +xl +pointedly +050 +stabilization +##odon +chests +1699 +hacked +ctv +orthogonal +suzy +##lastic +gaulle +jacobite +rearview +##cam +##erted +ashby +##drik +##igate +##mise +##zbek +affectionately +canine +disperse +latham +##istles +##ivar +spielberg +##orin +##idium +ezekiel +cid +##sg +durga +middletown +##cina +customized +frontiers +harden +##etano +##zzy +1604 +bolsheviks +##66 +coloration +yoko +##bedo +briefs +slabs +debra +liquidation +plumage +##oin +blossoms +dementia +subsidy +1611 +proctor +relational +jerseys +parochial +ter +##ici +esa +peshawar +cavalier +loren +cpi +idiots +shamrock +1646 +dutton +malabar +mustache +##endez +##ocytes +referencing +terminates +marche +yarmouth +##sop +acton +mated +seton +subtly +baptised +beige +extremes +jolted +kristina +telecast +##actic +safeguard +waldo +##baldi +##bular +endeavors +sloppy +subterranean +##ensburg +##itung +delicately +pigment +tq +##scu +1626 +##ound +collisions +coveted +herds +##personal +##meister +##nberger +chopra +##ricting +abnormalities +defective +galician +lucie +##dilly +alligator +likened +##genase +burundi +clears +complexion +derelict +deafening +diablo +fingered +champaign +dogg +enlist +isotope +labeling +mrna +##erre +brilliance +marvelous +##ayo +1652 +crawley +ether +footed +dwellers +deserts +hamish +rubs +warlock +skimmed +##lizer +870 +buick +embark +heraldic +irregularities +##ajan +kiara +##kulam +##ieg +antigen +kowalski +##lge +oakley +visitation +##mbit +vt +##suit +1570 +murderers +##miento +##rites +chimneys +##sling +condemn +custer +exchequer +havre +##ghi +fluctuations +##rations +dfb +hendricks +vaccines +##tarian +nietzsche +biking +juicy +##duced +brooding +scrolling +selangor +##ragan +352 +annum +boomed +seminole +sugarcane +##dna +departmental +dismissing +innsbruck +arteries +ashok +batavia +daze +kun +overtook +##rga +##tlan +beheaded +gaddafi +holm +electronically +faulty +galilee +fractures +kobayashi +##lized +gunmen +magma +aramaic +mala +eastenders +inference +messengers +bf +##qu +407 +bathrooms +##vere +1658 +flashbacks +ideally +misunderstood +##jali +##weather +mendez +##grounds +505 +uncanny +##iii +1709 +friendships +##nbc +sacrament +accommodated +reiterated +logistical +pebbles +thumped +##escence +administering +decrees +drafts +##flight +##cased +##tula +futuristic +picket +intimidation +winthrop +##fahan +interfered +339 +afar +francoise +morally +uta +cochin +croft +dwarfs +##bruck +##dents +##nami +biker +##hner +##meral +nano +##isen +##ometric +##pres +##ан +brightened +meek +parcels +securely +gunners +##jhl +##zko +agile +hysteria +##lten +##rcus +bukit +champs +chevy +cuckoo +leith +sadler +theologians +welded +##section +1663 +jj +plurality +xander +##rooms +##formed +shredded +temps +intimately +pau +tormented +##lok +##stellar +1618 +charred +ems +essen +##mmel +alarms +spraying +ascot +blooms +twinkle +##abia +##apes +internment +obsidian +##chaft +snoop +##dav +##ooping +malibu +##tension +quiver +##itia +hays +mcintosh +travers +walsall +##ffie +1623 +beverley +schwarz +plunging +structurally +m3 +rosenthal +vikram +##tsk +770 +ghz +##onda +##tiv +chalmers +groningen +pew +reckon +unicef +##rvis +55th +##gni +1651 +sulawesi +avila +cai +metaphysical +screwing +turbulence +##mberg +augusto +samba +56th +baffled +momentary +toxin +##urian +##wani +aachen +condoms +dali +steppe +##3d +##app +##oed +##year +adolescence +dauphin +electrically +inaccessible +microscopy +nikita +##ega +atv +##cel +##enter +##oles +##oteric +##ы +accountants +punishments +wrongly +bribes +adventurous +clinch +flinders +southland +##hem +##kata +gough +##ciency +lads +soared +##ה +undergoes +deformation +outlawed +rubbish +##arus +##mussen +##nidae +##rzburg +arcs +##ingdon +##tituted +1695 +wheelbase +wheeling +bombardier +campground +zebra +##lices +##oj +##bain +lullaby +##ecure +donetsk +wylie +grenada +##arding +##ης +squinting +eireann +opposes +##andra +maximal +runes +##broken +##cuting +##iface +##ror +##rosis +additive +britney +adultery +triggering +##drome +detrimental +aarhus +containment +jc +swapped +vichy +##ioms +madly +##oric +##rag +brant +##ckey +##trix +1560 +1612 +broughton +rustling +##stems +##uder +asbestos +mentoring +##nivorous +finley +leaps +##isan +apical +pry +slits +substitutes +##dict +intuitive +fantasia +insistent +unreasonable +##igen +##vna +domed +hannover +margot +ponder +##zziness +impromptu +jian +lc +rampage +stemming +##eft +andrey +gerais +whichever +amnesia +appropriated +anzac +clicks +modifying +ultimatum +cambrian +maids +verve +yellowstone +##mbs +conservatoire +##scribe +adherence +dinners +spectra +imperfect +mysteriously +sidekick +tatar +tuba +##aks +##ifolia +distrust +##athan +##zle +c2 +ronin +zac +##pse +celaena +instrumentalist +scents +skopje +##mbling +comical +compensated +vidal +condor +intersect +jingle +wavelengths +##urrent +mcqueen +##izzly +carp +weasel +422 +kanye +militias +postdoctoral +eugen +gunslinger +##ɛ +faux +hospice +##for +appalled +derivation +dwarves +##elis +dilapidated +##folk +astoria +philology +##lwyn +##otho +##saka +inducing +philanthropy +##bf +##itative +geek +markedly +sql +##yce +bessie +indices +rn +##flict +495 +frowns +resolving +weightlifting +tugs +cleric +contentious +1653 +mania +rms +##miya +##reate +##ruck +##tucket +bien +eels +marek +##ayton +##cence +discreet +unofficially +##ife +leaks +##bber +1705 +332 +dung +compressor +hillsborough +pandit +shillings +distal +##skin +381 +##tat +##you +nosed +##nir +mangrove +undeveloped +##idia +textures +##inho +##500 +##rise +ae +irritating +nay +amazingly +bancroft +apologetic +compassionate +kata +symphonies +##lovic +airspace +##lch +930 +gifford +precautions +fulfillment +sevilla +vulgar +martinique +##urities +looting +piccolo +tidy +##dermott +quadrant +armchair +incomes +mathematicians +stampede +nilsson +##inking +##scan +foo +quarterfinal +##ostal +shang +shouldered +squirrels +##owe +344 +vinegar +##bner +##rchy +##systems +delaying +##trics +ars +dwyer +rhapsody +sponsoring +##gration +bipolar +cinder +starters +##olio +##urst +421 +signage +##nty +aground +figurative +mons +acquaintances +duets +erroneously +soyuz +elliptic +recreated +##cultural +##quette +##ssed +##tma +##zcz +moderator +scares +##itaire +##stones +##udence +juniper +sighting +##just +##nsen +britten +calabria +ry +bop +cramer +forsyth +stillness +##л +airmen +gathers +unfit +##umber +##upt +taunting +##rip +seeker +streamlined +##bution +holster +schumann +tread +vox +##gano +##onzo +strive +dil +reforming +covent +newbury +predicting +##orro +decorate +tre +##puted +andover +ie +asahi +dept +dunkirk +gills +##tori +buren +huskies +##stis +##stov +abstracts +bets +loosen +##opa +1682 +yearning +##glio +##sir +berman +effortlessly +enamel +napoli +persist +##peration +##uez +attache +elisa +b1 +invitations +##kic +accelerating +reindeer +boardwalk +clutches +nelly +polka +starbucks +##kei +adamant +huey +lough +unbroken +adventurer +embroidery +inspecting +stanza +##ducted +naia +taluka +##pone +##roids +chases +deprivation +florian +##jing +##ppet +earthly +##lib +##ssee +colossal +foreigner +vet +freaks +patrice +rosewood +triassic +upstate +##pkins +dominates +ata +chants +ks +vo +##400 +##bley +##raya +##rmed +555 +agra +infiltrate +##ailing +##ilation +##tzer +##uppe +##werk +binoculars +enthusiast +fujian +squeak +##avs +abolitionist +almeida +boredom +hampstead +marsden +rations +##ands +inflated +334 +bonuses +rosalie +patna +##rco +329 +detachments +penitentiary +54th +flourishing +woolf +##dion +##etched +papyrus +##lster +##nsor +##toy +bobbed +dismounted +endelle +inhuman +motorola +tbs +wince +wreath +##ticus +hideout +inspections +sanjay +disgrace +infused +pudding +stalks +##urbed +arsenic +leases +##hyl +##rrard +collarbone +##waite +##wil +dowry +##bant +##edance +genealogical +nitrate +salamanca +scandals +thyroid +necessitated +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##{ +##| +##} +##~ +##¡ +##¢ +##£ +##¤ +##¥ +##¦ +##§ +##¨ +##© +##ª +##« +##¬ +##® +##± +##´ +##µ +##¶ +##· +##º +##» +##¼ +##¾ +##¿ +##æ +##ð +##÷ +##þ +##đ +##ħ +##ŋ +##œ +##ƒ +##ɐ +##ɑ +##ɒ +##ɔ +##ɕ +##ə +##ɡ +##ɣ +##ɨ +##ɪ +##ɫ +##ɬ +##ɯ +##ɲ +##ɴ +##ɹ +##ɾ +##ʀ +##ʁ +##ʂ +##ʃ +##ʉ +##ʊ +##ʋ +##ʌ +##ʎ +##ʐ +##ʑ +##ʒ +##ʔ +##ʰ +##ʲ +##ʳ +##ʷ +##ʸ +##ʻ +##ʼ +##ʾ +##ʿ +##ˈ +##ˡ +##ˢ +##ˣ +##ˤ +##β +##γ +##δ +##ε +##ζ +##θ +##κ +##λ +##μ +##ξ +##ο +##π +##ρ +##σ +##τ +##υ +##φ +##χ +##ψ +##ω +##б +##г +##д +##ж +##з +##м +##п +##с +##у +##ф +##х +##ц +##ч +##ш +##щ +##ъ +##э +##ю +##ђ +##є +##і +##ј +##љ +##њ +##ћ +##ӏ +##ա +##բ +##գ +##դ +##ե +##թ +##ի +##լ +##կ +##հ +##մ +##յ +##ն +##ո +##պ +##ս +##վ +##տ +##ր +##ւ +##ք +##־ +##א +##ב +##ג +##ד +##ו +##ז +##ח +##ט +##י +##ך +##כ +##ל +##ם +##מ +##ן +##נ +##ס +##ע +##ף +##פ +##ץ +##צ +##ק +##ר +##ש +##ת +##، +##ء +##ب +##ت +##ث +##ج +##ح +##خ +##ذ +##ز +##س +##ش +##ص +##ض +##ط +##ظ +##ع +##غ +##ـ +##ف +##ق +##ك +##و +##ى +##ٹ +##پ +##چ +##ک +##گ +##ں +##ھ +##ہ +##ے +##अ +##आ +##उ +##ए +##क +##ख +##ग +##च +##ज +##ट +##ड +##ण +##त +##थ +##द +##ध +##न +##प +##ब +##भ +##म +##य +##र +##ल +##व +##श +##ष +##स +##ह +##ा +##ि +##ी +##ो +##। +##॥ +##ং +##অ +##আ +##ই +##উ +##এ +##ও +##ক +##খ +##গ +##চ +##ছ +##জ +##ট +##ড +##ণ +##ত +##থ +##দ +##ধ +##ন +##প +##ব +##ভ +##ম +##য +##র +##ল +##শ +##ষ +##স +##হ +##া +##ি +##ী +##ে +##க +##ச +##ட +##த +##ந +##ன +##ப +##ம +##ய +##ர +##ல +##ள +##வ +##ா +##ி +##ு +##ே +##ை +##ನ +##ರ +##ಾ +##ක +##ය +##ර +##ල +##ව +##ා +##ก +##ง +##ต +##ท +##น +##พ +##ม +##ย +##ร +##ล +##ว +##ส +##อ +##า +##เ +##་ +##། +##ག +##ང +##ད +##ན +##པ +##བ +##མ +##འ +##ར +##ལ +##ས +##မ +##ა +##ბ +##გ +##დ +##ე +##ვ +##თ +##ი +##კ +##ლ +##მ +##ნ +##ო +##რ +##ს +##ტ +##უ +##ᄀ +##ᄂ +##ᄃ +##ᄅ +##ᄆ +##ᄇ +##ᄉ +##ᄊ +##ᄋ +##ᄌ +##ᄎ +##ᄏ +##ᄐ +##ᄑ +##ᄒ +##ᅡ +##ᅢ +##ᅥ +##ᅦ +##ᅧ +##ᅩ +##ᅪ +##ᅭ +##ᅮ +##ᅯ +##ᅲ +##ᅳ +##ᅴ +##ᅵ +##ᆨ +##ᆫ +##ᆯ +##ᆷ +##ᆸ +##ᆼ +##ᴬ +##ᴮ +##ᴰ +##ᴵ +##ᴺ +##ᵀ +##ᵃ +##ᵇ +##ᵈ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵖ +##ᵗ +##ᵘ +##ᵣ +##ᵤ +##ᵥ +##ᶜ +##ᶠ +##‐ +##‑ +##‒ +##– +##— +##― +##‖ +##‘ +##’ +##‚ +##“ +##” +##„ +##† +##‡ +##• +##… +##‰ +##′ +##″ +##› +##‿ +##⁄ +##⁰ +##ⁱ +##⁴ +##⁵ +##⁶ +##⁷ +##⁸ +##⁹ +##⁻ +##ⁿ +##₅ +##₆ +##₇ +##₈ +##₉ +##₊ +##₍ +##₎ +##ₐ +##ₑ +##ₒ +##ₓ +##ₕ +##ₖ +##ₗ +##ₘ +##ₚ +##ₛ +##ₜ +##₤ +##₩ +##€ +##₱ +##₹ +##ℓ +##№ +##ℝ +##™ +##⅓ +##⅔ +##← +##↑ +##→ +##↓ +##↔ +##↦ +##⇄ +##⇌ +##⇒ +##∂ +##∅ +##∆ +##∇ +##∈ +##∗ +##∘ +##√ +##∞ +##∧ +##∨ +##∩ +##∪ +##≈ +##≡ +##≤ +##≥ +##⊂ +##⊆ +##⊕ +##⊗ +##⋅ +##─ +##│ +##■ +##▪ +##● +##★ +##☆ +##☉ +##♠ +##♣ +##♥ +##♦ +##♯ +##⟨ +##⟩ +##ⱼ +##⺩ +##⺼ +##⽥ +##、 +##。 +##〈 +##〉 +##《 +##》 +##「 +##」 +##『 +##』 +##〜 +##あ +##い +##う +##え +##お +##か +##き +##く +##け +##こ +##さ +##し +##す +##せ +##そ +##た +##ち +##っ +##つ +##て +##と +##な +##に +##ぬ +##ね +##の +##は +##ひ +##ふ +##へ +##ほ +##ま +##み +##む +##め +##も +##や +##ゆ +##よ +##ら +##り +##る +##れ +##ろ +##を +##ん +##ァ +##ア +##ィ +##イ +##ウ +##ェ +##エ +##オ +##カ +##キ +##ク +##ケ +##コ +##サ +##シ +##ス +##セ +##タ +##チ +##ッ +##ツ +##テ +##ト +##ナ +##ニ +##ノ +##ハ +##ヒ +##フ +##ヘ +##ホ +##マ +##ミ +##ム +##メ +##モ +##ャ +##ュ +##ョ +##ラ +##リ +##ル +##レ +##ロ +##ワ +##ン +##・ +##ー +##一 +##三 +##上 +##下 +##不 +##世 +##中 +##主 +##久 +##之 +##也 +##事 +##二 +##五 +##井 +##京 +##人 +##亻 +##仁 +##介 +##代 +##仮 +##伊 +##会 +##佐 +##侍 +##保 +##信 +##健 +##元 +##光 +##八 +##公 +##内 +##出 +##分 +##前 +##劉 +##力 +##加 +##勝 +##北 +##区 +##十 +##千 +##南 +##博 +##原 +##口 +##古 +##史 +##司 +##合 +##吉 +##同 +##名 +##和 +##囗 +##四 +##国 +##國 +##土 +##地 +##坂 +##城 +##堂 +##場 +##士 +##夏 +##外 +##大 +##天 +##太 +##夫 +##奈 +##女 +##子 +##学 +##宀 +##宇 +##安 +##宗 +##定 +##宣 +##宮 +##家 +##宿 +##寺 +##將 +##小 +##尚 +##山 +##岡 +##島 +##崎 +##川 +##州 +##巿 +##帝 +##平 +##年 +##幸 +##广 +##弘 +##張 +##彳 +##後 +##御 +##德 +##心 +##忄 +##志 +##忠 +##愛 +##成 +##我 +##戦 +##戸 +##手 +##扌 +##政 +##文 +##新 +##方 +##日 +##明 +##星 +##春 +##昭 +##智 +##曲 +##書 +##月 +##有 +##朝 +##木 +##本 +##李 +##村 +##東 +##松 +##林 +##森 +##楊 +##樹 +##橋 +##歌 +##止 +##正 +##武 +##比 +##氏 +##民 +##水 +##氵 +##氷 +##永 +##江 +##沢 +##河 +##治 +##法 +##海 +##清 +##漢 +##瀬 +##火 +##版 +##犬 +##王 +##生 +##田 +##男 +##疒 +##発 +##白 +##的 +##皇 +##目 +##相 +##省 +##真 +##石 +##示 +##社 +##神 +##福 +##禾 +##秀 +##秋 +##空 +##立 +##章 +##竹 +##糹 +##美 +##義 +##耳 +##良 +##艹 +##花 +##英 +##華 +##葉 +##藤 +##行 +##街 +##西 +##見 +##訁 +##語 +##谷 +##貝 +##貴 +##車 +##軍 +##辶 +##道 +##郎 +##郡 +##部 +##都 +##里 +##野 +##金 +##鈴 +##镇 +##長 +##門 +##間 +##阝 +##阿 +##陳 +##陽 +##雄 +##青 +##面 +##風 +##食 +##香 +##馬 +##高 +##龍 +##龸 +##fi +##fl +##! +##( +##) +##, +##- +##. +##/ +##: +##? +##~ diff --git a/anet_clip/backup/pdvc/modules/cross-base/cross_config.json b/anet_clip/backup/pdvc/modules/cross-base/cross_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a4807695d56a3aea97a55a9db97ba753e960748 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/cross-base/cross_config.json @@ -0,0 +1,12 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 1024, + "num_attention_heads": 12, + "num_hidden_layers": 2, + "vocab_size": 768 +} \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json b/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91c46b63eba081afb28085a6d53f390ada5a5cfe --- /dev/null +++ b/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json @@ -0,0 +1,14 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522, + "num_decoder_layers": 1, + "max_target_embeddings": 512 +} diff --git a/anet_clip/backup/pdvc/modules/file_utils.py b/anet_clip/backup/pdvc/modules/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..43fa8ca87e20ee5333dd84a09795a743bbf3f183 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/file_utils.py @@ -0,0 +1,239 @@ +""" +Utilities for working with the local dataset cache. +This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp +Copyright by the AllenNLP authors. +""" + +import os +import logging +import shutil +import tempfile +import json +from urllib.parse import urlparse +from pathlib import Path +from typing import Optional, Tuple, Union, IO, Callable, Set +from hashlib import sha256 +from functools import wraps + +from tqdm import tqdm + +import boto3 +from botocore.exceptions import ClientError +import requests + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + + +def url_to_filename(url: str, etag: str = None) -> str: + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the url's, delimited + by a period. + """ + url_bytes = url.encode('utf-8') + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode('utf-8') + etag_hash = sha256(etag_bytes) + filename += '.' + etag_hash.hexdigest() + + return filename + + +def filename_to_url(filename: str, cache_dir: Union[str, Path] = None) -> Tuple[str, str]: + """ + Return the url and etag (which may be ``None``) stored for `filename`. + Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + cache_path = os.path.join(cache_dir, filename) + if not os.path.exists(cache_path): + raise FileNotFoundError("file {} not found".format(cache_path)) + + meta_path = cache_path + '.json' + if not os.path.exists(meta_path): + raise FileNotFoundError("file {} not found".format(meta_path)) + + with open(meta_path) as meta_file: + metadata = json.load(meta_file) + url = metadata['url'] + etag = metadata['etag'] + + return url, etag + + +def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str: + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ('http', 'https', 's3'): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == '': + # File, but it doesn't exist. + raise FileNotFoundError("file {} not found".format(url_or_filename)) + else: + # Something unknown + raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + + +def split_s3_path(url: str) -> Tuple[str, str]: + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + + +def s3_request(func: Callable): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url: str, *args, **kwargs): + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise FileNotFoundError("file {} not found".format(url)) + else: + raise + + return wrapper + + +@s3_request +def s3_etag(url: str) -> Optional[str]: + """Check ETag on S3 object.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + + +@s3_request +def s3_get(url: str, temp_file: IO) -> None: + """Pull a file directly from S3.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + + +def http_get(url: str, temp_file: IO) -> None: + req = requests.get(url, stream=True) + content_length = req.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + + +def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str: + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + os.makedirs(cache_dir, exist_ok=True) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + response = requests.head(url, allow_redirects=True) + if response.status_code != 200: + raise IOError("HEAD request failed for url {} with status code {}" + .format(url, response.status_code)) + etag = response.headers.get("ETag") + + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, 'wb') as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {'url': url, 'etag': etag} + meta_path = cache_path + '.json' + with open(meta_path, 'w') as meta_file: + json.dump(meta, meta_file) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + + +def read_set_from_file(filename: str) -> Set[str]: + ''' + Extract a de-duped collection (set) of text from a file. + Expected file format is one item per line. + ''' + collection = set() + with open(filename, 'r', encoding='utf-8') as file_: + for line in file_: + collection.add(line.rstrip()) + return collection + + +def get_file_extension(path: str, dot=True, lower: bool = True): + ext = os.path.splitext(path)[1] + ext = ext if dot else ext[1:] + return ext.lower() if lower else ext diff --git a/anet_clip/backup/pdvc/modules/modeling.py b/anet_clip/backup/pdvc/modules/modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..9551b488c16d04fad65dcdaeba7d73d7740f2902 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/modeling.py @@ -0,0 +1,429 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import numpy as np + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn import CrossEntropyLoss, MSELoss + +from pdvc.modules.until_module import PreTrainedModel, LayerNorm, CrossEn, MILNCELoss, MaxMarginRankingLoss +from pdvc.modules.module_bert import BertModel, BertConfig, BertOnlyMLMHead +from pdvc.modules.module_visual import VisualModel, VisualConfig, VisualOnlyMLMHead +from pdvc.modules.module_cross import CrossModel, CrossConfig +from pdvc.modules.module_decoder import DecoderModel, DecoderConfig + +logger = logging.getLogger(__name__) + + +class UniVLPreTrainedModel(PreTrainedModel, nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs): + # utilize bert config as base config + super(UniVLPreTrainedModel, self).__init__(bert_config) + self.bert_config = bert_config + self.visual_config = visual_config + self.cross_config = cross_config + self.decoder_config = decoder_config + + self.bert = None + self.visual = None + self.cross = None + self.decoder = None + + @classmethod + def from_pretrained(cls, pretrained_bert_name, visual_model_name, cross_model_name, decoder_model_name, + state_dict=None, cache_dir=None, type_vocab_size=2, *inputs, **kwargs): + + task_config = None + if "task_config" in kwargs.keys(): + task_config = kwargs["task_config"] + if not hasattr(task_config, "local_rank"): + task_config.__dict__["local_rank"] = 0 + elif task_config.local_rank == -1: + task_config.local_rank = 0 + print(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config) + bert_config, state_dict = BertConfig.get_config(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config=task_config) + visual_config, _ = VisualConfig.get_config(visual_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + cross_config, _ = CrossConfig.get_config(cross_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + decoder_config, _ = DecoderConfig.get_config(decoder_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + + model = cls(bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs) + + assert model.bert is not None + assert model.visual is not None + + if state_dict is not None: + model = cls.init_preweight(model, state_dict, task_config=task_config) + + return model + +class NormalizeVideo(nn.Module): + def __init__(self, task_config): + super(NormalizeVideo, self).__init__() + self.visual_norm2d = LayerNorm(task_config.video_dim) + + def forward(self, video): + video = torch.as_tensor(video).float() + video = video.view(-1, video.shape[-2], video.shape[-1]) + video = self.visual_norm2d(video) + return video + +def show_log(task_config, info): + if task_config is None or task_config.local_rank == 0: + logger.warning(info) + +def update_attr(target_name, target_config, target_attr_name, source_config, source_attr_name, default_value=None): + if hasattr(source_config, source_attr_name): + if default_value is None or getattr(source_config, source_attr_name) != default_value: + setattr(target_config, target_attr_name, getattr(source_config, source_attr_name)) + show_log(source_config, "Set {}.{}: {}.".format(target_name, + target_attr_name, getattr(target_config, target_attr_name))) + return target_config + +def check_attr(target_name, task_config): + return hasattr(task_config, target_name) and task_config.__dict__[target_name] + +class UniVL(UniVLPreTrainedModel): + def __init__(self, bert_config, visual_config, cross_config, decoder_config, task_config): + super(UniVL, self).__init__(bert_config, visual_config, cross_config, decoder_config) + self.task_config = task_config + self.ignore_video_index = -1 + + assert self.task_config.max_words <= bert_config.max_position_embeddings + assert self.task_config.max_words <= decoder_config.max_target_embeddings + assert self.task_config.max_frames <= visual_config.max_position_embeddings + assert self.task_config.max_words + self.task_config.max_frames <= cross_config.max_position_embeddings + + self._stage_one = True + self._stage_two = False + + if check_attr('stage_two', self.task_config): + self._stage_one = False + self._stage_two = self.task_config.stage_two + show_log(task_config, "Stage-One:{}, Stage-Two:{}".format(self._stage_one, self._stage_two)) + + self.train_sim_after_cross = False + if self._stage_one and check_attr('train_sim_after_cross', self.task_config): + self.train_sim_after_cross = True + show_log(task_config, "Test retrieval after cross encoder.") + + # Text Encoder ===> + bert_config = update_attr("bert_config", bert_config, "num_hidden_layers", + self.task_config, "text_num_hidden_layers") + # print('=================The bert config:==========/n',bert_config) + # print('=================The task config:==========/n',self.task_config) + self.bert = BertModel(bert_config) + bert_word_embeddings_weight = self.bert.embeddings.word_embeddings.weight + bert_position_embeddings_weight = self.bert.embeddings.position_embeddings.weight + # <=== End of Text Encoder + + # Video Encoder ===> + visual_config = update_attr("visual_config", visual_config, "num_hidden_layers", + self.task_config, "visual_num_hidden_layers") + self.visual = VisualModel(visual_config) + visual_word_embeddings_weight = self.visual.embeddings.word_embeddings.weight + # <=== End of Video Encoder + + if self._stage_one is False or self.train_sim_after_cross: + # Cross Encoder ===> + cross_config = update_attr("cross_config", cross_config, "num_hidden_layers", + self.task_config, "cross_num_hidden_layers") + self.cross = CrossModel(cross_config) + # <=== End of Cross Encoder + + if self.train_sim_after_cross is False: + # Decoder ===> + decoder_config = update_attr("decoder_config", decoder_config, "num_decoder_layers", + self.task_config, "decoder_num_hidden_layers") + self.decoder = DecoderModel(decoder_config, bert_word_embeddings_weight, bert_position_embeddings_weight) + # <=== End of Decoder + + if self.task_config.do_pretrain: + self.cls = BertOnlyMLMHead(bert_config, bert_word_embeddings_weight) + self.cls_visual = VisualOnlyMLMHead(visual_config, visual_word_embeddings_weight) + self.alm_loss_fct = CrossEntropyLoss(ignore_index=-1) + + self.similarity_dense = nn.Linear(bert_config.hidden_size, 1) + self.decoder_loss_fct = CrossEntropyLoss(ignore_index=-1) + + self.normalize_video = NormalizeVideo(task_config) + + mILNCELoss = MILNCELoss(batch_size=task_config.batch_size // task_config.n_gpu, n_pair=task_config.n_pair, ) + maxMarginRankingLoss = MaxMarginRankingLoss(margin=task_config.margin, + negative_weighting=task_config.negative_weighting, + batch_size=task_config.batch_size // task_config.n_gpu, + n_pair=task_config.n_pair, + hard_negative_rate=task_config.hard_negative_rate, ) + + if task_config.use_mil: + self.loss_fct = CrossEn() if self._stage_two else mILNCELoss + self._pretrain_sim_loss_fct = mILNCELoss + else: + self.loss_fct = CrossEn() if self._stage_two else maxMarginRankingLoss + self._pretrain_sim_loss_fct = maxMarginRankingLoss + + self.apply(self.init_weights) + + def forward(self, input_ids, token_type_ids, attention_mask, video, video_mask=None, + pairs_masked_text=None, pairs_token_labels=None, masked_video=None, video_labels_index=None, + input_caption_ids=None, decoder_mask=None, output_caption_ids=None): + + input_ids = input_ids.view(-1, input_ids.shape[-1]) + token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + video = self.normalize_video(video) + + if input_caption_ids is not None: + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + sequence_output, visual_output = self.get_sequence_visual_output(input_ids, token_type_ids, attention_mask, + video, video_mask, shaped=True) + + if self.training: + loss = 0. + if self._stage_one: + sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, + video_mask, shaped=True) + sim_loss = self.loss_fct(sim_matrix) + loss += sim_loss + + if self._stage_two: + if self.task_config.do_pretrain: + pairs_masked_text = pairs_masked_text.view(-1, pairs_masked_text.shape[-1]) + pairs_token_labels = pairs_token_labels.view(-1, pairs_token_labels.shape[-1]) + + masked_video = self.normalize_video(masked_video) + video_labels_index = video_labels_index.view(-1, video_labels_index.shape[-1]) + + sequence_output_alm, visual_output_alm = self.get_sequence_visual_output(pairs_masked_text, token_type_ids, + attention_mask, masked_video, video_mask, shaped=True) + + cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output_alm, visual_output_alm, attention_mask, video_mask) + sequence_cross_output, visual_cross_output = torch.split(cross_output, [attention_mask.size(-1), video_mask.size(-1)], dim=1) + + alm_loss = self._calculate_mlm_loss(sequence_cross_output, pairs_token_labels) + loss += alm_loss + + nce_loss = self._calculate_mfm_loss(visual_cross_output, video, video_mask, video_labels_index) + loss += nce_loss + + sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, video_mask, + shaped=True, _pretrain_joint=True) + sim_loss_joint = self._pretrain_sim_loss_fct(sim_matrix) + loss += sim_loss_joint + + if (input_caption_ids is not None) and \ + (self.task_config.do_pretrain + or (self.task_config.do_pretrain is False and self.task_config.task_type == "caption")): + if self.task_config.do_pretrain: + decoder_scores, res_tuples = self._get_decoder_score(sequence_output_alm, visual_output_alm, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + elif self.task_config.task_type == "caption": + decoder_scores, res_tuples = self._get_decoder_score(sequence_output, visual_output, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + else: + raise NotImplementedError + + output_caption_ids = output_caption_ids.view(-1, output_caption_ids.shape[-1]) + decoder_loss = self.decoder_loss_fct(decoder_scores.view(-1, self.bert_config.vocab_size), output_caption_ids.view(-1)) + loss += decoder_loss + + if self.task_config.do_pretrain or self.task_config.task_type == "retrieval": + if self.task_config.do_pretrain: + sim_matrix_text_visual = self.get_similarity_logits(sequence_output_alm, visual_output_alm, + attention_mask, video_mask, shaped=True) + elif self.task_config.task_type == "retrieval": + sim_matrix_text_visual = self.get_similarity_logits(sequence_output, visual_output, + attention_mask, video_mask, shaped=True) + else: + raise NotImplementedError + + sim_loss_text_visual = self.loss_fct(sim_matrix_text_visual) + loss += sim_loss_text_visual + + return loss + else: + return None + + def _calculate_mlm_loss(self, sequence_output_alm, pairs_token_labels): + alm_scores = self.cls(sequence_output_alm) + alm_loss = self.alm_loss_fct(alm_scores.view(-1, self.bert_config.vocab_size), pairs_token_labels.view(-1)) + return alm_loss + + def _calculate_mfm_loss(self, visual_output_alm, video, video_mask, video_labels_index): + afm_scores = self.cls_visual(visual_output_alm) + afm_scores_tr = afm_scores.view(-1, afm_scores.shape[-1]) + + video_tr = video.permute(2, 0, 1) + video_tr = video_tr.view(video_tr.shape[0], -1) + + logits_matrix = torch.mm(afm_scores_tr, video_tr) + video_mask_float = video_mask.to(dtype=torch.float) + mask_matrix = torch.mm(video_mask_float.view(-1, 1), video_mask_float.view(1, -1)) + masked_logits = logits_matrix + (1. - mask_matrix) * -1e8 + + logpt = F.log_softmax(masked_logits, dim=-1) + logpt = torch.diag(logpt) + nce_loss = -logpt + + video_labels_index_mask = (video_labels_index != self.ignore_video_index) + nce_loss = nce_loss.masked_select(video_labels_index_mask.view(-1)) + nce_loss = nce_loss.mean() + return nce_loss + + def get_sequence_visual_output(self, input_ids, token_type_ids, attention_mask, video, video_mask, shaped=False): + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + video = self.normalize_video(video) + encoded_layers, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True) + sequence_output = encoded_layers[-1] + + visual_layers, _ = self.visual(video, video_mask, output_all_encoded_layers=True) + visual_output = visual_layers[-1] + + return sequence_output, visual_output + + def _get_cross_output(self, sequence_output, visual_output, attention_mask, video_mask): + concat_features = torch.cat((sequence_output, visual_output), dim=1) # concatnate tokens and frames + concat_mask = torch.cat((attention_mask, video_mask), dim=1) + text_type_ = torch.zeros_like(attention_mask) + video_type_ = torch.ones_like(video_mask) + concat_type = torch.cat((text_type_, video_type_), dim=1) + + cross_layers, pooled_output = self.cross(concat_features, concat_type, concat_mask, output_all_encoded_layers=True) + cross_output = cross_layers[-1] + + return cross_output, pooled_output, concat_mask + + def _mean_pooling_for_similarity(self, sequence_output, visual_output, attention_mask, video_mask,): + attention_mask_un = attention_mask.to(dtype=torch.float).unsqueeze(-1) + attention_mask_un[:, 0, :] = 0. + sequence_output = sequence_output * attention_mask_un + text_out = torch.sum(sequence_output, dim=1) / torch.sum(attention_mask_un, dim=1, dtype=torch.float) + + video_mask_un = video_mask.to(dtype=torch.float).unsqueeze(-1) + visual_output = visual_output * video_mask_un + video_mask_un_sum = torch.sum(video_mask_un, dim=1, dtype=torch.float) + video_mask_un_sum[video_mask_un_sum == 0.] = 1. + video_out = torch.sum(visual_output, dim=1) / video_mask_un_sum + + return text_out, video_out + + def _cross_similarity(self, sequence_output, visual_output, attention_mask, video_mask): + b_text, s_text, h_text = sequence_output.size() + b_visual, s_visual, h_visual = visual_output.size() + + retrieve_logits_list = [] + step_size = 5 + + split_size = [step_size] * (b_text // step_size) + release_size = b_text - sum(split_size) + if release_size > 0: + split_size += [release_size] + + sequence_output_splits = torch.split(sequence_output, split_size, dim=0) + attention_mask_splits = torch.split(attention_mask, split_size, dim=0) + for i in range(len(split_size)): + sequence_output_row = sequence_output_splits[i] + attention_mask_row = attention_mask_splits[i] + sequence_output_l = sequence_output_row.unsqueeze(1).repeat(1, b_visual, 1, 1) + sequence_output_l = sequence_output_l.view(-1, s_text, h_text) + attention_mask_l = attention_mask_row.unsqueeze(1).repeat(1, b_visual, 1) + attention_mask_l = attention_mask_l.view(-1, s_text) + + step_truth = sequence_output_row.size(0) + visual_output_r = visual_output.unsqueeze(0).repeat(step_truth, 1, 1, 1) + visual_output_r = visual_output_r.view(-1, s_visual, h_visual) + video_mask_r = video_mask.unsqueeze(0).repeat(step_truth, 1, 1) + video_mask_r = video_mask_r.view(-1, s_visual) + + cross_output, pooled_output, concat_mask = \ + self._get_cross_output(sequence_output_l, visual_output_r, attention_mask_l, video_mask_r) + retrieve_logits_row = self.similarity_dense(pooled_output).squeeze(-1).view(step_truth, b_visual) + + retrieve_logits_list.append(retrieve_logits_row) + retrieve_logits = torch.cat(retrieve_logits_list, dim=0) + return retrieve_logits + + def get_similarity_logits(self, sequence_output, visual_output, attention_mask, video_mask, shaped=False, _pretrain_joint=False): + if shaped is False: + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + if (self._stage_two and _pretrain_joint is False) or self.train_sim_after_cross: + retrieve_logits = self._cross_similarity(sequence_output, visual_output, attention_mask, video_mask) + else: + text_out, video_out = self._mean_pooling_for_similarity(sequence_output, visual_output, attention_mask, video_mask) + if self.task_config.use_mil is False: + text_out = F.normalize(text_out, dim=-1) + video_out = F.normalize(video_out, dim=-1) + retrieve_logits = torch.matmul(text_out, video_out.t()) + + return retrieve_logits + + def _get_decoder_score(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, shaped=False): + + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + res_tuples = () + cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output, visual_output, attention_mask, video_mask) + decoder_scores = self.decoder(input_caption_ids, encoder_outs=cross_output, answer_mask=decoder_mask, encoder_mask=concat_mask) + + return decoder_scores, res_tuples + + def decoder_caption(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, + shaped=False, get_logits=False): + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + decoder_scores, _ = self._get_decoder_score(sequence_output, visual_output, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + + if get_logits: + return decoder_scores + + _, decoder_scores_result = torch.max(decoder_scores, -1) + + return decoder_scores_result \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/module_bert.py b/anet_clip/backup/pdvc/modules/module_bert.py new file mode 100644 index 0000000000000000000000000000000000000000..aa376657fdf271f11978379665a67897c2cc5943 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/module_bert.py @@ -0,0 +1,447 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz", +} + +CONFIG_NAME = 'bert_config.json' +WEIGHTS_NAME = 'pytorch_model.bin' + + +class BertConfig(PretrainedConfig): + """Configuration class to store the configuration of a `BertModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs BertConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + layer = BertLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class BertPooler(nn.Module): + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(bert_model_embedding_weights.size(1), + bert_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = bert_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + +class BertModel(PreTrainedModel): + """BERT model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a BertConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see BERT's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) + ``` + """ + def __init__(self, config): + super(BertModel, self).__init__(config) + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_weights) + + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/module_cross.py b/anet_clip/backup/pdvc/modules/module_cross.py new file mode 100644 index 0000000000000000000000000000000000000000..8ff41910a2c62e1c79ab3f843bef3c54171bb026 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/module_cross.py @@ -0,0 +1,394 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'cross_config.json' +WEIGHTS_NAME = 'cross_pytorch_model.bin' + + +class CrossConfig(PretrainedConfig): + """Configuration class to store the configuration of a `CrossModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs CrossConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CrossModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `CrossModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + + +class CrossEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(CrossEmbeddings, self).__init__() + + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, concat_embeddings, concat_type=None): + + batch_size, seq_length = concat_embeddings.size(0), concat_embeddings.size(1) + if concat_type is None: + concat_type = torch.zeros(batch_size, concat_type).to(concat_embeddings.device) + + position_ids = torch.arange(seq_length, dtype=torch.long, device=concat_embeddings.device) + position_ids = position_ids.unsqueeze(0).expand(concat_embeddings.size(0), -1) + + token_type_embeddings = self.token_type_embeddings(concat_type) + position_embeddings = self.position_embeddings(position_ids) + + embeddings = concat_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class CrossSelfAttention(nn.Module): + def __init__(self, config): + super(CrossSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in CrossModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class CrossSelfOutput(nn.Module): + def __init__(self, config): + super(CrossSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class CrossAttention(nn.Module): + def __init__(self, config): + super(CrossAttention, self).__init__() + self.self = CrossSelfAttention(config) + self.output = CrossSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class CrossIntermediate(nn.Module): + def __init__(self, config): + super(CrossIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class CrossOutput(nn.Module): + def __init__(self, config): + super(CrossOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class CrossLayer(nn.Module): + def __init__(self, config): + super(CrossLayer, self).__init__() + self.attention = CrossAttention(config) + self.intermediate = CrossIntermediate(config) + self.output = CrossOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class CrossEncoder(nn.Module): + def __init__(self, config): + super(CrossEncoder, self).__init__() + layer = CrossLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class CrossPooler(nn.Module): + def __init__(self, config): + super(CrossPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class CrossPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(CrossPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class CrossLMPredictionHead(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossLMPredictionHead, self).__init__() + self.transform = CrossPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(cross_model_embedding_weights.size(1), + cross_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = cross_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(cross_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class CrossOnlyMLMHead(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossOnlyMLMHead, self).__init__() + self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class CrossOnlyNSPHead(nn.Module): + def __init__(self, config): + super(CrossOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class CrossPreTrainingHeads(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossPreTrainingHeads, self).__init__() + self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class CrossModel(PreTrainedModel): + def __init__(self, config): + super(CrossModel, self).__init__(config) + self.embeddings = CrossEmbeddings(config) + self.encoder = CrossEncoder(config) + self.pooler = CrossPooler(config) + self.apply(self.init_weights) + + def forward(self, concat_input, concat_type=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones(concat_input.size(0), concat_input.size(1)) + if concat_type is None: + concat_type = torch.zeros_like(attention_mask) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(concat_input, concat_type) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output diff --git a/anet_clip/backup/pdvc/modules/module_decoder.py b/anet_clip/backup/pdvc/modules/module_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..25622d1e4c0e9a0d19fe2b4986f7267ba1526823 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/module_decoder.py @@ -0,0 +1,406 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil +import numpy as np + +import torch +from torch import nn +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'decoder_config.json' +WEIGHTS_NAME = 'decoder_pytorch_model.bin' + + +class DecoderConfig(PretrainedConfig): + """Configuration class to store the configuration of a `DecoderModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + type_vocab_size=2, + initializer_range=0.02, + max_target_embeddings=128, + num_decoder_layers=1): + """Constructs DecoderConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `DecoderModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `DecoderModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + max_target_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + num_decoder_layers: + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.max_target_embeddings = max_target_embeddings + self.num_decoder_layers = num_decoder_layers + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, decoder_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(decoder_model_embedding_weights.size(1), + decoder_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = decoder_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(decoder_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, decoder_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, decoder_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + +class MultiHeadAttention(nn.Module): + ''' Multi-Head Attention module ''' + + def __init__(self, config): + super(MultiHeadAttention, self).__init__() + + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, q, k, v, attention_mask): + mixed_query_layer = self.query(q) + mixed_key_layer = self.key(k) + mixed_value_layer = self.value(v) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + return context_layer, attention_scores + +class PositionwiseFeedForward(nn.Module): + ''' A two-feed-forward-layer module ''' + + def __init__(self, d_in, d_hid, dropout=0.1): + super().__init__() + self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise + self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise + self.layer_norm = nn.LayerNorm(d_in) + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + residual = x + output = x.transpose(1, 2) + output = self.w_2(ACT2FN["gelu"](self.w_1(output))) + output = output.transpose(1, 2) + output = self.dropout(output) + output = self.layer_norm(output + residual) + return output + +class DecoderAttention(nn.Module): + def __init__(self, config): + super(DecoderAttention, self).__init__() + self.att = MultiHeadAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, q, k, v, attention_mask): + att_output, attention_probs = self.att(q, k, v, attention_mask) + attention_output = self.output(att_output, q) + return attention_output, attention_probs + +class DecoderLayer(nn.Module): + def __init__(self, config): + super(DecoderLayer, self).__init__() + self.slf_attn = DecoderAttention(config) + self.enc_attn = DecoderAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, dec_input, enc_output, slf_attn_mask=None, dec_enc_attn_mask=None): + slf_output, _ = self.slf_attn(dec_input, dec_input, dec_input, slf_attn_mask) + dec_output, dec_att_scores = self.enc_attn(slf_output, enc_output, enc_output, dec_enc_attn_mask) + intermediate_output = self.intermediate(dec_output) + dec_output = self.output(intermediate_output, dec_output) + return dec_output, dec_att_scores + +class DecoderEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight): + super(DecoderEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_target_embeddings, config.hidden_size) + self.word_embeddings.weight = decoder_word_embeddings_weight + self.position_embeddings.weight = decoder_position_embeddings_weight + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + + embeddings = words_embeddings + position_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class Decoder(nn.Module): + def __init__(self, config): + super(Decoder, self).__init__() + layer = DecoderLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_decoder_layers)]) + + def forward(self, hidden_states, encoder_outs, self_attn_mask, attention_mask, output_all_encoded_layers=False): + dec_att_scores = None + all_encoder_layers = [] + all_dec_att_probs = [] + for layer_module in self.layer: + hidden_states, dec_att_scores = layer_module(hidden_states, encoder_outs, self_attn_mask, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + all_dec_att_probs.append(dec_att_scores) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + all_dec_att_probs.append(dec_att_scores) + return all_encoder_layers, all_dec_att_probs + +class DecoderClassifier(nn.Module): + def __init__(self, config, embedding_weights): + super(DecoderClassifier, self).__init__() + self.cls = BertOnlyMLMHead(config, embedding_weights) + + def forward(self, hidden_states): + cls_scores = self.cls(hidden_states) + return cls_scores + +class DecoderModel(PreTrainedModel): + + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + final_norm (bool, optional): apply layer norm to the output of the + final decoder layer (default: True). + """ + + def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight): + super(DecoderModel, self).__init__(config) + self.config = config + self.max_target_length = config.max_target_embeddings + self.embeddings = DecoderEmbeddings(config, decoder_word_embeddings_weight, decoder_position_embeddings_weight) + self.decoder = Decoder(config) + self.classifier = DecoderClassifier(config, decoder_word_embeddings_weight) + self.apply(self.init_weights) + + def forward(self, input_ids, encoder_outs=None, answer_mask=None, encoder_mask=None): + """ + Args: + input_ids (LongTensor): previous decoder outputs of shape `(batch, tgt_len)`, for input feeding/teacher forcing + encoder_outs (Tensor, optional): output from the encoder, used for encoder-side attention + + Returns: + tuple: + - the last decoder layer's output of shape `(batch, tgt_len, vocab)` + - the last decoder layer's attention weights of shape `(batch, tgt_len, src_len)` + """ + embedding_output = self.embeddings(input_ids) + + extended_encoder_mask = encoder_mask.unsqueeze(1).unsqueeze(2) # b x 1 x 1 x ls + extended_encoder_mask = extended_encoder_mask.to(dtype=self.dtype) # fp16 compatibility + extended_encoder_mask = (1.0 - extended_encoder_mask) * -10000.0 + + extended_answer_mask = answer_mask.unsqueeze(1).unsqueeze(2) + extended_answer_mask = extended_answer_mask.to(dtype=self.dtype) # fp16 compatibility + + sz_b, len_s, _ = embedding_output.size() + subsequent_mask = torch.triu(torch.ones((len_s, len_s), device=embedding_output.device, dtype=embedding_output.dtype), diagonal=1) + self_attn_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1).unsqueeze(1) # b x 1 x ls x ls + slf_attn_mask = ((1.0 - extended_answer_mask) + self_attn_mask).gt(0).to(dtype=self.dtype) + self_attn_mask = slf_attn_mask * -10000.0 + + decoded_layers, dec_att_scores = self.decoder(embedding_output, + encoder_outs, + self_attn_mask, + extended_encoder_mask, + ) + sequence_output = decoded_layers[-1] + cls_scores = self.classifier(sequence_output) + + return cls_scores diff --git a/anet_clip/backup/pdvc/modules/module_visual.py b/anet_clip/backup/pdvc/modules/module_visual.py new file mode 100644 index 0000000000000000000000000000000000000000..b9a43f8a74c1e5e020c8b4daec33d7adb5d3b840 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/module_visual.py @@ -0,0 +1,425 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'visual_config.json' +WEIGHTS_NAME = 'visual_pytorch_model.bin' + + +class VisualConfig(PretrainedConfig): + """Configuration class to store the configuration of a `VisualModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file=4096, + hidden_size=768, + num_hidden_layers=3, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + initializer_range=0.02): + """Constructs VisualConfig. + + Args: + vocab_size_or_config_json_file: Size of the encoder layers and the pooler layer. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class VisualEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(VisualEmbeddings, self).__init__() + + self.word_embeddings = nn.Linear(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_embeddings): + seq_length = input_embeddings.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_embeddings.device) + position_ids = position_ids.unsqueeze(0).expand(input_embeddings.size(0), -1) + + words_embeddings = self.word_embeddings(input_embeddings) + # words_embeddings = self.transform_act_fn(words_embeddings) + + position_embeddings = self.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class VisualSelfAttention(nn.Module): + def __init__(self, config): + super(VisualSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in VisualModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class VisualSelfOutput(nn.Module): + def __init__(self, config): + super(VisualSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class VisualAttention(nn.Module): + def __init__(self, config): + super(VisualAttention, self).__init__() + self.self = VisualSelfAttention(config) + self.output = VisualSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class VisualIntermediate(nn.Module): + def __init__(self, config): + super(VisualIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class VisualOutput(nn.Module): + def __init__(self, config): + super(VisualOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class VisualLayer(nn.Module): + def __init__(self, config): + super(VisualLayer, self).__init__() + self.attention = VisualAttention(config) + self.intermediate = VisualIntermediate(config) + self.output = VisualOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class VisualEncoder(nn.Module): + def __init__(self, config): + super(VisualEncoder, self).__init__() + layer = VisualLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class VisualPooler(nn.Module): + def __init__(self, config): + super(VisualPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class VisualPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(VisualPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class VisualLMPredictionHead(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualLMPredictionHead, self).__init__() + self.transform = VisualPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.weight = visual_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(visual_model_embedding_weights.size(1))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = hidden_states.matmul(self.weight) + self.bias + return hidden_states + + +class VisualOnlyMLMHead(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualOnlyMLMHead, self).__init__() + self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class VisualOnlyNSPHead(nn.Module): + def __init__(self, config): + super(VisualOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class VisualPreTrainingHeads(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualPreTrainingHeads, self).__init__() + self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class VisualModel(PreTrainedModel): + """Visual model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a VisualConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for Visual-base, 24 for Visual-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see 's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + + config = modeling.VisualConfig(vocab_size_or_config_json_file=4096, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.VisualModel(config=config) + all_encoder_layers, pooled_output = model(video, video_mask) + ``` + """ + def __init__(self, config): + super(VisualModel, self).__init__(config) + self.embeddings = VisualEmbeddings(config) + self.encoder = VisualEncoder(config) + self.pooler = VisualPooler(config) + self.apply(self.init_weights) + + def forward(self, video, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones(video.size(0), video.size(1)) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(video) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/optimization.py b/anet_clip/backup/pdvc/modules/optimization.py new file mode 100644 index 0000000000000000000000000000000000000000..264c57c7d8f213004b4ee82a8861e0ae6103c906 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/optimization.py @@ -0,0 +1,168 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch optimization for BERT model.""" + +import math +import torch +from torch.optim import Optimizer +from torch.optim.optimizer import required +from torch.nn.utils import clip_grad_norm_ +import logging + +logger = logging.getLogger(__name__) + +def warmup_cosine(x, warmup=0.002): + if x < warmup: + return x/warmup + return 0.5 * (1.0 + torch.cos(math.pi * x)) + +def warmup_constant(x, warmup=0.002): + """ Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps. + Learning rate is 1. afterwards. """ + if x < warmup: + return x/warmup + return 1.0 + +def warmup_linear(x, warmup=0.002): + """ Specifies a triangular learning rate schedule where peak is reached at `warmup`*`t_total`-th (as provided to BertAdam) training step. + After `t_total`-th training step, learning rate is zero. """ + if x < warmup: + return x/warmup + return max((x-1.)/(warmup-1.), 0) + +SCHEDULES = { + 'warmup_cosine': warmup_cosine, + 'warmup_constant': warmup_constant, + 'warmup_linear': warmup_linear, +} + + +class BertAdam(Optimizer): + """Implements BERT version of Adam algorithm with weight decay fix. + Params: + lr: learning rate + warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1 + t_total: total number of training steps for the learning + rate schedule, -1 means constant learning rate. Default: -1 + schedule: schedule to use for the warmup (see above). Default: 'warmup_linear' + b1: Adams b1. Default: 0.9 + b2: Adams b2. Default: 0.999 + e: Adams epsilon. Default: 1e-6 + weight_decay: Weight decay. Default: 0.01 + max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0 + """ + def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear', + b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, + max_grad_norm=1.0): + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr)) + if schedule not in SCHEDULES: + raise ValueError("Invalid schedule parameter: {}".format(schedule)) + if not 0.0 <= warmup < 1.0 and not warmup == -1: + raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup)) + if not 0.0 <= b1 < 1.0: + raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1)) + if not 0.0 <= b2 < 1.0: + raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2)) + if not e >= 0.0: + raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e)) + defaults = dict(lr=lr, schedule=schedule, warmup=warmup, t_total=t_total, + b1=b1, b2=b2, e=e, weight_decay=weight_decay, + max_grad_norm=max_grad_norm) + super(BertAdam, self).__init__(params, defaults) + + def get_lr(self): + lr = [] + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + state = self.state[p] + if len(state) == 0: + return [0] + if group['t_total'] != -1: + schedule_fct = SCHEDULES[group['schedule']] + lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup']) + else: + lr_scheduled = group['lr'] + lr.append(lr_scheduled) + return lr + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['next_m'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['next_v'] = torch.zeros_like(p.data) + + next_m, next_v = state['next_m'], state['next_v'] + beta1, beta2 = group['b1'], group['b2'] + + # Add grad clipping + if group['max_grad_norm'] > 0: + clip_grad_norm_(p, group['max_grad_norm']) + + # Decay the first and second moment running average coefficient + # In-place operations to update the averages at the same time + # next_m.mul_(beta1).add_(1 - beta1, grad) --> pytorch 1.7 + next_m.mul_(beta1).add_(grad, alpha=1 - beta1) + # next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad) --> pytorch 1.7 + next_v.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + update = next_m / (next_v.sqrt() + group['e']) + + # Just adding the square of the weights to the loss function is *not* + # the correct way of using L2 regularization/weight decay with Adam, + # since that will interact with the m and v parameters in strange ways. + # + # Instead we want to decay the weights in a manner that doesn't interact + # with the m/v parameters. This is equivalent to adding the square + # of the weights to the loss with plain (non-momentum) SGD. + if group['weight_decay'] > 0.0: + update += group['weight_decay'] * p.data + + if group['t_total'] != -1: + schedule_fct = SCHEDULES[group['schedule']] + progress = state['step']/group['t_total'] + lr_scheduled = group['lr'] * schedule_fct(progress, group['warmup']) + else: + lr_scheduled = group['lr'] + + update_with_lr = lr_scheduled * update + p.data.add_(-update_with_lr) + + state['step'] += 1 + + return loss \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/tokenization.py b/anet_clip/backup/pdvc/modules/tokenization.py new file mode 100644 index 0000000000000000000000000000000000000000..183c81000f82aae59295f8d8572b6bcf67891790 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/tokenization.py @@ -0,0 +1,408 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import unicodedata +import os +import sys +import logging + +from .file_utils import cached_path + +logger = logging.getLogger(__name__) +PRETRAINED_VOCAB_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", +} +PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = { + 'base-uncased': 512, + 'large-uncased': 512, + 'base-cased': 512, + 'large-cased': 512, + 'base-multilingual-uncased': 512, + 'base-multilingual-cased': 512, + 'base-chinese': 512, +} +VOCAB_NAME = 'vocab.txt' + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r", encoding="utf-8") as reader: + while True: + token = reader.readline() + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class BertTokenizer(object): + """Runs end-to-end tokenization: punctuation splitting""" + + def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")): + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)) + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + self.max_len = max_len if max_len is not None else int(1e12) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Converts a sequence of tokens into ids using the vocab.""" + ids = [] + for token in tokens: + if token not in self.vocab: + ids.append(self.vocab["[UNK]"]) + logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token)) + else: + ids.append(self.vocab[token]) + if len(ids) > self.max_len: + raise ValueError( + "Token indices sequence length is longer than the specified maximum " + " sequence length for this BERT model ({} > {}). Running this" + " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) + ) + return ids + + def convert_ids_to_tokens(self, ids): + """Converts a sequence of ids in tokens using the vocab.""" + tokens = [] + for i in ids: + tokens.append(self.ids_to_tokens[i]) + return tokens + + @classmethod + def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedBertModel from a pre-trained model file. + Download and cache the pre-trained model file if needed. + """ + vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(vocab_file) is False: + if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP: + vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name] + else: + vocab_file = pretrained_model_name + if os.path.isdir(vocab_file): + vocab_file = os.path.join(vocab_file, VOCAB_NAME) + # redirect to the cache, if necessary + print(vocab_file) + try: + resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir) + except FileNotFoundError: + logger.error( + "Model name '{}' was not found. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + vocab_file)) + return None + if resolved_vocab_file == vocab_file: + logger.info("loading vocabulary file {}".format(vocab_file)) + else: + logger.info("loading vocabulary file {} from cache at {}".format( + vocab_file, resolved_vocab_file)) + if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP: + # if we're using a pretrained model, ensure the tokenizer wont index sequences longer + # than the number of positional embeddings + max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name] + kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len) + kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]") + + # Instantiate tokenizer. + tokenizer = cls(resolved_vocab_file, *inputs, **kwargs) + + return tokenizer + + def add_tokens(self, new_tokens, model): + """ + Add a list of new tokens to the tokenizer class. If the new tokens are not in the + vocabulary, they are added to it with indices starting from length of the current vocabulary. + Args: + new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them). + Returns: + Number of tokens added to the vocabulary. + Examples:: + # Let's see how to increase the vocabulary of Bert model and tokenizer + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained('bert-base-uncased') + num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2']) + print('We have added', num_added_toks, 'tokens') + model.resize_token_embeddings(len(tokenizer)) # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer. + """ + + to_add_tokens = [] + for token in new_tokens: + assert isinstance(token, str) + to_add_tokens.append(token) + # logger.info("Adding %s to the vocabulary", token) + + vocab = collections.OrderedDict() + for token in self.vocab.keys(): + vocab[token] = self.vocab[token] + for token in to_add_tokens: + vocab[token] = len(vocab) + self.vocab = self.wordpiece_tokenizer.vocab = vocab + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + + model.resize_token_embeddings(new_num_tokens=len(vocab)) + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + self.never_split = never_split + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case and token not in self.never_split: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + if text in self.never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + +class WordpieceTokenizer(object): + """Runs WordPiece tokenization.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer`. + + Returns: + A list of wordpiece tokens. + """ + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/anet_clip/backup/pdvc/modules/until_config.py b/anet_clip/backup/pdvc/modules/until_config.py new file mode 100644 index 0000000000000000000000000000000000000000..596c157aa23c82eb33c1fb2e07d9b006a52990e9 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/until_config.py @@ -0,0 +1,126 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import logging +import tarfile +import tempfile +import shutil +import torch +from .file_utils import cached_path + +logger = logging.getLogger(__name__) + +class PretrainedConfig(object): + + pretrained_model_archive_map = {} + config_name = "" + weights_name = "" + + @classmethod + def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None): + archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(archive_file) is False: + if pretrained_model_name in cls.pretrained_model_archive_map: + archive_file = cls.pretrained_model_archive_map[pretrained_model_name] + else: + archive_file = pretrained_model_name + + # redirect to the cache, if necessary + try: + resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir) + except FileNotFoundError: + if task_config is None or task_config.local_rank == 0: + logger.error( + "Model name '{}' was not found in model name list. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + archive_file)) + return None + if resolved_archive_file == archive_file: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {}".format(archive_file)) + else: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {} from cache at {}".format( + archive_file, resolved_archive_file)) + tempdir = None + if os.path.isdir(resolved_archive_file): + serialization_dir = resolved_archive_file + else: + # Extract archive to temp dir + tempdir = tempfile.mkdtemp() + if task_config is None or task_config.local_rank == 0: + logger.info("extracting archive file {} to temp dir {}".format( + resolved_archive_file, tempdir)) + with tarfile.open(resolved_archive_file, 'r:gz') as archive: + archive.extractall(tempdir) + serialization_dir = tempdir + # Load config + config_file = os.path.join(serialization_dir, cls.config_name) + config = cls.from_json_file(config_file) + config.type_vocab_size = type_vocab_size + if task_config is None or task_config.local_rank == 0: + logger.info("Model config {}".format(config)) + + if state_dict is None: + weights_path = os.path.join(serialization_dir, cls.weights_name) + if os.path.exists(weights_path): + state_dict = torch.load(weights_path, map_location='cpu') + else: + if task_config is None or task_config.local_rank == 0: + logger.info("Weight doesn't exsits. {}".format(weights_path)) + + if tempdir: + # Clean up temp dir + shutil.rmtree(tempdir) + + return config, state_dict + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = cls(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/until_module.py b/anet_clip/backup/pdvc/modules/until_module.py new file mode 100644 index 0000000000000000000000000000000000000000..d550638157f8aeb2116a9cce022b2c563fd3491b --- /dev/null +++ b/anet_clip/backup/pdvc/modules/until_module.py @@ -0,0 +1,251 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +import logging +import numpy as np +import torch +from torch import nn +import torch.nn.functional as F +import math +from pdvc.modules.until_config import PretrainedConfig + +logger = logging.getLogger(__name__) + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +def swish(x): + return x * torch.sigmoid(x) + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish} + +class LayerNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-12): + """Construct a layernorm module in the TF style (epsilon inside the square root). + """ + super(LayerNorm, self).__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.bias = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward(self, x): + u = x.mean(-1, keepdim=True) + s = (x - u).pow(2).mean(-1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.variance_epsilon) + return self.weight * x + self.bias + +class PreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, config, *inputs, **kwargs): + super(PreTrainedModel, self).__init__() + if not isinstance(config, PretrainedConfig): + raise ValueError( + "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " + "To create a model from a Google pretrained model use " + "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( + self.__class__.__name__, self.__class__.__name__ + )) + self.config = config + + def init_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, LayerNorm): + if 'beta' in dir(module) and 'gamma' in dir(module): + module.beta.data.zero_() + module.gamma.data.fill_(1.0) + else: + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + def resize_token_embeddings(self, new_num_tokens=None): + raise NotImplementedError + + @classmethod + def init_preweight(cls, model, state_dict, prefix=None, task_config=None): + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + if prefix is not None: + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + old_keys.append(key) + new_keys.append(prefix + key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(model, prefix='') + + if prefix is None and (task_config is None or task_config.local_rank == 0): + logger.info("-" * 20) + if len(missing_keys) > 0: + logger.info("Weights of {} not initialized from pretrained model: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(missing_keys))) + if len(unexpected_keys) > 0: + logger.info("Weights from pretrained model not used in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(unexpected_keys))) + if len(error_msgs) > 0: + logger.error("Weights from pretrained model cause errors in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(error_msgs))) + + return model + + @property + def dtype(self): + """ + :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype). + """ + try: + return next(self.parameters()).dtype + except StopIteration: + # For nn.DataParallel compatibility in PyTorch 1.5 + def find_tensor_attributes(module: nn.Module): + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = self._named_members(get_members_fn=find_tensor_attributes) + first_tuple = next(gen) + return first_tuple[1].dtype + + @classmethod + def from_pretrained(cls, config, state_dict=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict. + Download and cache the pre-trained model file if needed. + """ + # Instantiate model. + model = cls(config, *inputs, **kwargs) + if state_dict is None: + return model + model = cls.init_preweight(model, state_dict) + + return model + +################################## +###### LOSS FUNCTION ############# +################################## +class CrossEn(nn.Module): + def __init__(self,): + super(CrossEn, self).__init__() + + def forward(self, sim_matrix): + logpt = F.log_softmax(sim_matrix, dim=-1) + logpt = torch.diag(logpt) + nce_loss = -logpt + sim_loss = nce_loss.mean() + return sim_loss + +class MILNCELoss(nn.Module): + def __init__(self, batch_size=1, n_pair=1,): + super(MILNCELoss, self).__init__() + self.batch_size = batch_size + self.n_pair = n_pair + torch_v = float(".".join(torch.__version__.split(".")[:2])) + self.bool_dtype = torch.bool if torch_v >= 1.3 else torch.uint8 + + def forward(self, sim_matrix): + mm_mask = np.eye(self.batch_size) + mm_mask = np.kron(mm_mask, np.ones((self.n_pair, self.n_pair))) + mm_mask = torch.tensor(mm_mask).float().to(sim_matrix.device) + + from_text_matrix = sim_matrix + mm_mask * -1e12 + from_video_matrix = sim_matrix.transpose(1, 0) + + new_sim_matrix = torch.cat([from_video_matrix, from_text_matrix], dim=-1) + logpt = F.log_softmax(new_sim_matrix, dim=-1) + + mm_mask_logpt = torch.cat([mm_mask, torch.zeros_like(mm_mask)], dim=-1) + masked_logpt = logpt + (torch.ones_like(mm_mask_logpt) - mm_mask_logpt) * -1e12 + + new_logpt = -torch.logsumexp(masked_logpt, dim=-1) + + logpt_choice = torch.zeros_like(new_logpt) + mark_ind = torch.arange(self.batch_size).to(sim_matrix.device) * self.n_pair + (self.n_pair//2) + logpt_choice[mark_ind] = 1 + sim_loss = new_logpt.masked_select(logpt_choice.to(dtype=self.bool_dtype)).mean() + return sim_loss + +class MaxMarginRankingLoss(nn.Module): + def __init__(self, + margin=1.0, + negative_weighting=False, + batch_size=1, + n_pair=1, + hard_negative_rate=0.5, + ): + super(MaxMarginRankingLoss, self).__init__() + self.margin = margin + self.n_pair = n_pair + self.batch_size = batch_size + easy_negative_rate = 1 - hard_negative_rate + self.easy_negative_rate = easy_negative_rate + self.negative_weighting = negative_weighting + if n_pair > 1 and batch_size > 1: + alpha = easy_negative_rate / ((batch_size - 1) * (1 - easy_negative_rate)) + mm_mask = (1 - alpha) * np.eye(self.batch_size) + alpha + mm_mask = np.kron(mm_mask, np.ones((n_pair, n_pair))) + mm_mask = torch.tensor(mm_mask) * (batch_size * (1 - easy_negative_rate)) + self.mm_mask = mm_mask.float() + + def forward(self, x): + d = torch.diag(x) + max_margin = F.relu(self.margin + x - d.view(-1, 1)) + \ + F.relu(self.margin + x - d.view(1, -1)) + if self.negative_weighting and self.n_pair > 1 and self.batch_size > 1: + max_margin = max_margin * self.mm_mask.to(max_margin.device) + return max_margin.mean() \ No newline at end of file diff --git a/anet_clip/backup/pdvc/modules/visual-base/visual_config.json b/anet_clip/backup/pdvc/modules/visual-base/visual_config.json new file mode 100644 index 0000000000000000000000000000000000000000..324fcb6e7ba63166767adf9afa82324412247a48 --- /dev/null +++ b/anet_clip/backup/pdvc/modules/visual-base/visual_config.json @@ -0,0 +1,12 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 1, + "vocab_size": 1024 +} diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..5f86c9097b3b6f4b7f50b9d70f7cd58b2f386871 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO @@ -0,0 +1,6 @@ +Metadata-Version: 2.1 +Name: MultiScaleDeformableAttention +Version: 1.0 +Summary: PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention +Home-page: https://github.com/fundamentalvision/Deformable-DETR +Author: Weijie Su diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc251e74aff93cae99a730109d3f696ef326b210 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt @@ -0,0 +1,13 @@ +setup.py +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu +MultiScaleDeformableAttention.egg-info/PKG-INFO +MultiScaleDeformableAttention.egg-info/SOURCES.txt +MultiScaleDeformableAttention.egg-info/dependency_links.txt +MultiScaleDeformableAttention.egg-info/top_level.txt +functions/__init__.py +functions/ms_deform_attn_func.py +modules/__init__.py +modules/ms_deform_attn.py +modules/ms_deform_attn_for_caption.py \ No newline at end of file diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..25d8f7790d14d04a74c6acec779aedb3688ef630 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt @@ -0,0 +1,3 @@ +MultiScaleDeformableAttention +functions +modules diff --git a/anet_clip/backup/pdvc/ops/__init__.py b/anet_clip/backup/pdvc/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ed3d8ddca46efead59543bfd2f1961790abdc96 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c722836c6716e99f5a33542ebc2461e4540b9c0 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..affe1b85a7c92a8c1ecfca0d0b2c329ce77bf383 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5720c8c8f59f4168baf51ec63ba9c5f5e90d5abb998c0fbdd6170547d23a13 +size 7942000 diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +try: + import MultiScaleDeformableAttention as MSDA +except: + pass + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + ctx.im2col_step = im2col_step + output = MSDA.ms_deform_attn_forward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value, grad_sampling_loc, grad_attn_weight = \ + MSDA.ms_deform_attn_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head + + _, Lq_, M_, L_, P_, _ = sampling_locations.shape # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points + + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1] + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='border', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + + if return_value: + return torch.stack(sampling_value_list, dim=-2) + #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn import MSDeformAttn +from .ms_deform_attn_for_caption import MSDeformAttnCap \ No newline at end of file diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points ) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + if query.device.type == 'cuda': + output = MSDeformAttnFunction.apply( + value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, + self.im2col_step) + else: + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + output = self.output_proj(output) + return output diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py @@ -0,0 +1,123 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttnCap(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + grid_init = grid_init - grid_init.mean(2, keepdim=True) + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 1 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights, + return_value=True) + + return output diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps new file mode 100644 index 0000000000000000000000000000000000000000..2bef29d420f02b4282644cba394698912212dab8 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps differ diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log new file mode 100644 index 0000000000000000000000000000000000000000..fd78ae63cd064bb569f9279931f2e0668833f50d --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log @@ -0,0 +1,4 @@ +# ninja log v5 +0 2930 1685020146224081877 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o 8f7db54445222f0 +0 10580 1685020153869972218 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o 91f10249ca524b9b +0 13795 1685020157081510628 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o 3e48c35d2c631cee diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja new file mode 100644 index 0000000000000000000000000000000000000000..9d156fb45877ed14f310b8ae1f889c048fe0fa2b --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja @@ -0,0 +1,30 @@ +ninja_required_version = 1.3 +cxx = c++ +nvcc = /usr/local/cuda/bin/nvcc + +cflags = -pthread -B /home/liuhuabin/miniconda3/envs/PDVC/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c +post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 +cuda_cflags = -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 -std=c++14 +ldflags = + +rule compile + command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags + depfile = $out.d + deps = gcc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags + + + +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o: cuda_compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp + + + + + diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o new file mode 100644 index 0000000000000000000000000000000000000000..d30f1ff54acc23e3e0f5ea22b3a8828fdd2c44b7 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59afa2abc476414b1faa6816920a93293fc9e71aa96d790c80760a879f5d0682 +size 1437672 diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o new file mode 100644 index 0000000000000000000000000000000000000000..d9274a1b895a7c123eab8231e2e24c2ea6629581 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973f1d16162f782172da95253065226cd068f45430bbc1a8920929ffda09947d +size 920176 diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o new file mode 100644 index 0000000000000000000000000000000000000000..e771be34bcbacfa86a2e41f1728b9d0b2fef3a85 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad8100cd431dec4d7ef8dc5d144c90402c71b4b41a772e5f120c38b8fe9aa0e +size 10423896 diff --git a/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg b/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg new file mode 100644 index 0000000000000000000000000000000000000000..dc5bbc86e1f4304b490711416d30dbeecec3a2b8 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ad69121c719dc533912a5233ee2ba4d895fd745283dc122601f20b0da2a519 +size 2223428 diff --git a/anet_clip/backup/pdvc/ops/functions/__init__.py b/anet_clip/backup/pdvc/ops/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/functions/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00b83c1e1d8810a77347e3d76609cdf347898186 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09ce00b445b4c8d76b027f013de6cb094dae82dc Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..222160988ac28f5eba55fe2acff1a6b176b3429b Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fc0981ca1144f3eb8a7166b570fb797f8004a16 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py b/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b --- /dev/null +++ b/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +try: + import MultiScaleDeformableAttention as MSDA +except: + pass + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + ctx.im2col_step = im2col_step + output = MSDA.ms_deform_attn_forward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value, grad_sampling_loc, grad_attn_weight = \ + MSDA.ms_deform_attn_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head + + _, Lq_, M_, L_, P_, _ = sampling_locations.shape # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points + + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1] + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='border', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + + if return_value: + return torch.stack(sampling_value_list, dim=-2) + #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/anet_clip/backup/pdvc/ops/make.sh b/anet_clip/backup/pdvc/ops/make.sh new file mode 100644 index 0000000000000000000000000000000000000000..a7e4320108ecd2f02d1824505849850b0c69d319 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/make.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ +python setup.py build install diff --git a/anet_clip/backup/pdvc/ops/modules/__init__.py b/anet_clip/backup/pdvc/ops/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/modules/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn import MSDeformAttn +from .ms_deform_attn_for_caption import MSDeformAttnCap \ No newline at end of file diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd29db0d448db6cc3ebfcb499cb6105d2f745555 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc85ead761d81b2d819429824ee2393e9f50a6ae Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1de99e2b9ab1efc42b399837d8cfd7a09a3e2ef1 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12c1ccbe61ed8ca360ce969e012e60a89d05cece Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..671fa7d00552b0d0913bf502750b061574f7b3f2 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a23f676c0714c277a628441a7459d2724f62b61 Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc differ diff --git a/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points ) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + if query.device.type == 'cuda': + output = MSDeformAttnFunction.apply( + value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, + self.im2col_step) + else: + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + output = self.output_proj(output) + return output diff --git a/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f --- /dev/null +++ b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py @@ -0,0 +1,123 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttnCap(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + grid_init = grid_init - grid_init.mean(2, keepdim=True) + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 1 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights, + return_value=True) + + return output diff --git a/anet_clip/backup/pdvc/ops/setup.py b/anet_clip/backup/pdvc/ops/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..a0131bc21cf1b45b90fcf174e2c53e4c08e9c641 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/setup.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +import os +import glob + +import torch + +from torch.utils.cpp_extension import CUDA_HOME +from torch.utils.cpp_extension import CppExtension +from torch.utils.cpp_extension import CUDAExtension + +from setuptools import find_packages +from setuptools import setup + +requirements = ["torch", "torchvision"] + +def get_extensions(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + extensions_dir = os.path.join(this_dir, "src") + + main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) + source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) + + sources = main_file + source_cpu + extension = CppExtension + extra_compile_args = {"cxx": []} + define_macros = [] + + if torch.cuda.is_available() and CUDA_HOME is not None: + extension = CUDAExtension + sources += source_cuda + define_macros += [("WITH_CUDA", None)] + extra_compile_args["nvcc"] = [ + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", + ] + else: + raise NotImplementedError('Cuda is not availabel') + + sources = [os.path.join(extensions_dir, s) for s in sources] + include_dirs = [extensions_dir] + ext_modules = [ + extension( + "MultiScaleDeformableAttention", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + return ext_modules + +setup( + name="MultiScaleDeformableAttention", + version="1.0", + author="Weijie Su", + url="https://github.com/fundamentalvision/Deformable-DETR", + description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", + packages=find_packages(exclude=("configs", "tests",)), + ext_modules=get_extensions(), + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, +) diff --git a/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1bf854de1f3860d20b6fef5c1a17817c268e70a --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp @@ -0,0 +1,41 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include + +#include +#include + + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + diff --git a/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..81b7b58a3d9502bbb684dc84687a526dedf94cae --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h @@ -0,0 +1,33 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once +#include + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + + diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..d6d583647cce987196d5ad1968a8a365a379e774 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu @@ -0,0 +1,153 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include +#include "cuda/ms_deform_im2col_cuda.cuh" + +#include +#include +#include +#include + + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); + + const int batch_n = im2col_step_; + auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto columns = output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] { + ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + columns.data()); + + })); + } + + output = output.view({batch, num_query, num_heads*channels}); + + return output; +} + + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto grad_value = at::zeros_like(value); + auto grad_sampling_loc = at::zeros_like(sampling_loc); + auto grad_attn_weight = at::zeros_like(attn_weight); + + const int batch_n = im2col_step_; + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto grad_output_g = grad_output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] { + ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(), + grad_output_g.data(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + grad_value.data() + n * im2col_step_ * per_value_size, + grad_sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + grad_attn_weight.data() + n * im2col_step_ * per_attn_weight_size); + + })); + } + + return { + grad_value, grad_sampling_loc, grad_attn_weight + }; +} \ No newline at end of file diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..c7ae53f99c820ce6193b608ad344550348a0b42c --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h @@ -0,0 +1,30 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once +#include + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..5635be7822e7cbfb8b5524185f213a9368a91dce --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh @@ -0,0 +1,1328 @@ +/*! +************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************** +* Modified from DCN (https://github.com/msracver/Deformable-ConvNets) +* Copyright (c) 2018 Microsoft +************************************************************************** +*/ + +#include +#include +#include + +#include +#include + +#include + +// 使用相同间隔分配block +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ + i < (n); \ + i += blockDim.x * gridDim.x) + +const int CUDA_NUM_THREADS = 1024; +inline int GET_BLOCKS(const int N, const int num_threads) +{ + return (N + num_threads - 1) / num_threads; +} + + +template +__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + } + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + *grad_attn_weight = top_grad * val; + *grad_sampling_loc = width * grad_w_weight * top_grad_value; + *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + atomicAdd(grad_attn_weight, top_grad * val); + atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value); + atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value); +} + + +template +__global__ void ms_deformable_im2col_gpu_kernel(const int n, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + scalar_t *data_col_ptr = data_col + index; + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + scalar_t col = 0; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride); + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight; + } + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + } + } + *data_col_ptr = col; + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockSize; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockSize/2; s>0; s>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockDim.x; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]); + atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]); + atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]); + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear_gm( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + grad_sampling_loc, grad_attn_weight); + } + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +void ms_deformable_im2col_cuda(cudaStream_t stream, + const scalar_t* data_value, + const int64_t* data_spatial_shapes, + const int64_t* data_level_start_index, + const scalar_t* data_sampling_loc, + const scalar_t* data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* data_col) +{ + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + const int num_threads = CUDA_NUM_THREADS; + ms_deformable_im2col_gpu_kernel + <<>>( + num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, + batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); + } + +} + +template +void ms_deformable_col2im_cuda(cudaStream_t stream, + const scalar_t* grad_col, + const scalar_t* data_value, + const int64_t * data_spatial_shapes, + const int64_t * data_level_start_index, + const scalar_t * data_sampling_loc, + const scalar_t * data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels; + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + if (channels > 1024) + { + if ((channels & 1023) == 0) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_gm + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + else{ + switch(channels) + { + case 1: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 2: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 4: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 8: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 16: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 32: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 64: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 128: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 256: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 512: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 1024: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + default: + if (channels < 64) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + } + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); + } + +} \ No newline at end of file diff --git a/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h b/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h new file mode 100644 index 0000000000000000000000000000000000000000..ac0ef2ec25f7d0ee51ca2d807b159ddf85652017 --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h @@ -0,0 +1,62 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once + +#include "cpu/ms_deform_attn_cpu.h" + +#ifdef WITH_CUDA +#include "cuda/ms_deform_attn_cuda.h" +#endif + + +at::Tensor +ms_deform_attn_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_forward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +std::vector +ms_deform_attn_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_backward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + diff --git a/anet_clip/backup/pdvc/ops/src/vision.cpp b/anet_clip/backup/pdvc/ops/src/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2201f63a51dca16d0b31148ed2c9e8e47ec15bdc --- /dev/null +++ b/anet_clip/backup/pdvc/ops/src/vision.cpp @@ -0,0 +1,16 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include "ms_deform_attn.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); + m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); +} diff --git a/anet_clip/backup/pdvc/ops/test.py b/anet_clip/backup/pdvc/ops/test.py new file mode 100644 index 0000000000000000000000000000000000000000..8dbf6d5547d131f01a8c5c28b76557bd27a9334b --- /dev/null +++ b/anet_clip/backup/pdvc/ops/test.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import time +import torch +import torch.nn as nn +from torch.autograd import gradcheck + +from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +N, M, D = 1, 2, 2 +Lq, L, P = 2, 2, 2 +shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() +level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1])) +S = sum([(H*W).item() for H, W in shapes]) + + +torch.manual_seed(3) + + +@torch.no_grad() +def check_forward_equal_with_pytorch_double(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +@torch.no_grad() +def check_forward_equal_with_pytorch_float(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): + + value = torch.rand(N, S, M, channels).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + func = MSDeformAttnFunction.apply + + value.requires_grad = grad_value + sampling_locations.requires_grad = grad_sampling_loc + attention_weights.requires_grad = grad_attn_weight + + gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) + + print(f'* {gradok} check_gradient_numerical(D={channels})') + + +if __name__ == '__main__': + check_forward_equal_with_pytorch_double() + check_forward_equal_with_pytorch_float() + + for channels in [30, 32, 64, 71, 1025, 2048, 3096]: + check_gradient_numerical(channels, True, True, True) + + + diff --git a/anet_clip/backup/pdvc/pdvc.py b/anet_clip/backup/pdvc/pdvc.py new file mode 100644 index 0000000000000000000000000000000000000000..c342477fb906acda08cf40a040eb45b2b9e901b8 --- /dev/null +++ b/anet_clip/backup/pdvc/pdvc.py @@ -0,0 +1,1303 @@ +# ------------------------------------------------------------------------ +# PDVC +# ------------------------------------------------------------------------ +# Modified from Deformable DETR(https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +import json +import torch +import torch.nn.functional as F +from torch import nn +import math +import time + +from misc.detr_utils import box_ops +from misc.detr_utils.misc import (inverse_sigmoid) + +from .matcher import build_matcher + +from .deformable_transformer import build_deforamble_transformer +from pdvc.CaptioningHead import build_captioner +import copy +from .criterion import AlignCriterion, SetCriterion, ContrastiveCriterion +# from .rl_tool import init_scorer +from misc.utils import decide_two_stage +from .base_encoder import build_base_encoder +# from .video_segmentation import segment_video_into_steps, alignment_to_boundary, to_center_duration, align_frame_into_steps +from .video_segmentation import * +# from transformers import AutoModel, BertConfig +# from transformers.models.bert.modeling_bert import BertEncoder +import numpy as np +from itertools import chain +# from .UniVL import load_pretrained_UniVL + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + +class PDVC(nn.Module): + """ This is the PDVC module that performs dense video captioning """ + + def __init__(self, base_encoder, transformer, captioner, num_classes, num_queries, num_feature_levels, + aux_loss=True, with_box_refine=False, opt=None, translator=None): + """ Initializes the model. + Parameters: + transformer: torch module of the transformer architecture. See transformer.py + captioner: captioning head for generate a sentence for each event queries + num_classes: number of foreground classes + num_queries: number of event queries. This is the maximal number of events + PDVC can detect in a single video. For ActivityNet Captions, we recommend 10-30 queries. + aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used. + with_box_refine: iterative bounding box refinement + opt: all configs + """ + super().__init__() + self.opt = opt + self.base_encoder = base_encoder + self.transformer = transformer + self.caption_head = captioner + num_pred_text = 0 + + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # self.text_encoder = text_encoder + # text_encoder_hidden_dim = self.text_encoder.config.hidden_size + # num_pred_text += 1 + + hidden_dim = transformer.d_model + text_hidden_dim = opt.text_hidden_dim + + if self.opt.use_anchor: + # self.tgt_embed = nn.Embedding(num_queries, hidden_dim) + self.anchor_embed = nn.Embedding(num_queries, 2) # num_queries, 2 (center, duration) + self.query_embed = self.transformer.prepare_init_anchor_and_query(self.anchor_embed, hidden_dim, \ + random_anchor_init=True, prior_anchor_duration_init=True, \ + prior_duration=0.048) + self.query_embed = nn.Parameter(self.query_embed, requires_grad=True) + else: + self.query_embed = nn.Embedding(num_queries, hidden_dim * 2) + + self.class_head = nn.Linear(hidden_dim, num_classes) + self.class_refine_head = nn.Linear(hidden_dim, num_classes) # For refine pseudo box if use additional score layer + self.count_head = nn.Linear(hidden_dim, opt.max_eseq_length + 1) + self.bbox_head = MLP(hidden_dim, hidden_dim, 2, 3) + + self.num_feature_levels = num_feature_levels + self.aux_loss = aux_loss + self.with_box_refine = with_box_refine + self.share_caption_head = opt.share_caption_head + + # initialization + prior_prob = 0.01 + bias_value = -math.log((1 - prior_prob) / prior_prob) + self.class_head.bias.data = torch.ones(num_classes) * bias_value + self.class_refine_head.bias.data = torch.ones(num_classes) * bias_value + nn.init.constant_(self.bbox_head.layers[-1].weight.data, 0) + nn.init.constant_(self.bbox_head.layers[-1].bias.data, 0) + + if self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim' \ + or self.opt.use_pseudo_box: + self.load_text_embed = True + else: + self.load_text_embed = False + + + num_pred = transformer.decoder.num_layers + if self.share_caption_head: + print('all decoder layers share the same caption head') + self.caption_head = nn.ModuleList([self.caption_head for _ in range(num_pred)]) + else: + print('do NOT share the caption head') + self.caption_head = _get_clones(self.caption_head, num_pred) + + if self.opt.use_additional_cap_layer: + self.caption_head_refine = _get_clones(captioner, self.opt.refine_pseudo_stage_num) + + if with_box_refine: + self.class_head = _get_clones(self.class_head, num_pred) + self.count_head = _get_clones(self.count_head, num_pred) + self.bbox_head = _get_clones(self.bbox_head, num_pred) + nn.init.constant_(self.bbox_head[0].layers[-1].bias.data[1:], -2) + # hack implementation for iterative bounding box refinement + self.transformer.decoder.bbox_head = self.bbox_head + else: + nn.init.constant_(self.bbox_head.layers[-1].bias.data[1:], -2) + self.class_head = nn.ModuleList([self.class_head for _ in range(num_pred)]) + self.count_head = nn.ModuleList([self.count_head for _ in range(num_pred)]) + self.bbox_head = nn.ModuleList([self.bbox_head for _ in range(num_pred)]) + self.transformer.decoder.bbox_head = None + + self.class_refine_head = _get_clones(self.class_refine_head, self.opt.refine_pseudo_stage_num) + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + if opt.disable_contrastive_projection: + projection_event = nn.Identity() + projection_text = nn.Identity() + else: + projection_event = nn.Linear(hidden_dim, opt.contrastive_hidden_size) + projection_text = nn.Linear(text_hidden_dim, opt.contrastive_hidden_size) + self.contrastive_projection_event = nn.ModuleList( + [projection_event for _ in range(num_pred)]) + self.contrastive_projection_text = nn.ModuleList( + [projection_text for _ in range(num_pred)]) + if opt.enable_bg_for_cl: + self.background_embed = nn.Parameter(torch.randn(1, opt.contrastive_hidden_size), requires_grad=True) + else: + self.background_embed = None + + + self.translator = translator + + self.disable_mid_caption_heads = opt.disable_mid_caption_heads + if self.disable_mid_caption_heads: + print('only calculate caption loss in the last decoding layer') + + self.pseudo_boxes = {} + + + def get_filter_rule_for_encoder(self): + filter_rule = lambda x: 'input_proj' in x \ + or 'transformer.encoder' in x \ + or 'transformer.level_embed' in x \ + or 'base_encoder' in x + return filter_rule + + def encoder_decoder_parameters(self): + filter_rule = self.get_filter_rule_for_encoder() + enc_paras = [] + dec_paras = [] + for name, para in self.named_parameters(): + if filter_rule(name): + print('enc: {}'.format(name)) + enc_paras.append(para) + else: + print('dec: {}'.format(name)) + dec_paras.append(para) + return enc_paras, dec_paras + + # def text_encoding(self, text_encoder_input): + # ''' + # Produce the text embedding for each caption + # :param text_encoder_input: a dict of input for text encoder + # ''' + # if self.opt.pretrained_language_model == 'UniVL' or self.opt.use_pseudo_box: + # # breakpoint() + # dtype = next(self.parameters()).dtype + # enable_grad = False + # use_amp = False + # with torch.cuda.amp.autocast(enabled=use_amp): + # with torch.set_grad_enabled(enable_grad): + # text_embed = self.text_encoder(**text_encoder_input, output_all_encoded_layers=True)[0][-1] + # text_embed = text_embed.to(dtype=dtype) # num_sentence, num_word, dim + # attention_mask = text_encoder_input['attention_mask'].unsqueeze(-1).to(dtype=dtype) # num_sentence, num_word, 1 + # attention_mask[:,0,:] = 0. # This operation follows from the UniVL + # text_embed = text_embed * attention_mask # num_sentence, num_word, dim + # text_embed = text_embed.sum(dim=1) / attention_mask.sum(dim=1) # num_sentence, dim + # raw_text_embed = text_embed + # # if video_name: + # # text_feature_path = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text' + # # np.save('{}/{}.npy'.format(text_feature_path, video_name), text_embed.detach().cpu().numpy()) + # text_embed = self.contrastive_projection_text[-1](text_embed) + + # else: + # dtype = next(self.parameters()).dtype + # enable_grad = False + # use_amp = False + # with torch.cuda.amp.autocast(enabled=use_amp): + # with torch.set_grad_enabled(enable_grad): + # text_embed = self.text_encoder(**text_encoder_input) + # text_embed = text_embed['pooler_output'].to(dtype=dtype) # num_sentence, dim + # text_embed = self.contrastive_projection_text[-1](text_embed) # num_sentence, dim_contrastive_learning + # # TODO: add more paradigm to generate the text_embedding + + # return text_embed, raw_text_embed + + def forward(self, dt, criterion, contrastive_criterion, eval_mode=False): + transformer_input_type = self.opt.transformer_input_type + vf = dt['video_tensor'] # (N, L, C) + mask = ~ dt['video_mask'] # (N, L) + duration = dt['video_length'][:, 1] + video_name = dt['video_key'][0][2:] + # text_encoder_input = dt['text_encoder_input'] if (self.opt.matcher_type=='DTW' or self.opt.use_pseudo_box) else None + N, L, C = vf.shape + # assert N == 1, "batch size must be 1."s + + srcs, masks, pos = self.base_encoder(vf, mask, duration) + + src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten = self.transformer.prepare_encoder_inputs( + srcs, masks, pos) + memory = self.transformer.forward_encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, + lvl_pos_embed_flatten, mask_flatten) + + two_stage, disable_iterative_refine, proposals, proposals_mask = decide_two_stage(transformer_input_type, + dt, criterion) + if two_stage: + if transformer_input_type == 'prior_proposals': + if self.opt.prior_manner == 'add': + #print('Insert the prior knowledge by adding the prior proposals to the query embed') + init_query_embed = self.query_embed.weight + _, tgt = torch.chunk(init_query_embed, 2, dim=1) + tgt = tgt.unsqueeze(0).expand(N, -1, -1) + init_reference, _, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0]) + proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool() + else: + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0]) + proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool() + else: + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_proposal( + proposals) + else: + if self.opt.use_anchor: + # tgt = self.tgt_embed.weight + anchor = self.anchor_embed.weight # num_queries, 2 + query_anchor = (self.query_embed, anchor) + proposals_mask = torch.ones(N, self.query_embed.shape[0], device=self.query_embed.device).bool() + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_anchor(memory, query_anchor) + else: + query_embed = self.query_embed.weight + proposals_mask = torch.ones(N, query_embed.shape[0], device=query_embed.device).bool() + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_query(memory, + query_embed) + hs, inter_references = self.transformer.forward_decoder(tgt, reference_points, memory, temporal_shapes, + level_start_index, valid_ratios, query_embed, + mask_flatten, proposals_mask, disable_iterative_refine) + # hs: [num_decoder_layer, bs, num_query, feat_dim] + + # breakpoint() + # project to co-embedding space + if self.load_text_embed and eval_mode==False: + # text_embed, raw_text_embed = self.text_encoding(text_encoder_input) + # text_embed = [text_embed] * hs.shape[0] + # text_embed = torch.stack(text_embed, dim=0) + raw_text_embed = dt['cap_embed'] * hs.shape[0]# dt['caption_embedding'] returns a tuple(list) + # text_embed: [num_decoder_layer, num_sentence, contrastive_dim] + event_embed = torch.stack([self.contrastive_projection_event[i](hs_i) for i, hs_i in enumerate(hs)]) + text_embed = torch.stack([self.contrastive_projection_text[j](hs_j.cuda()) for j, hs_j in enumerate(raw_text_embed)]) + # breakpoint() + # event_embed: [num_decoder_layer, num_query, contrastive_dim] + else: + raw_text_embed = None + text_embed = None + event_embed = hs + # breakpoint() + if self.opt.use_pseudo_box and self.training: + # breakpoint() + # print('use pseudo box') + video_frame_num = dt['video_length'][:,0].cpu().numpy() # [feature_len, raw_video_len, video_len] + video_name = dt['video_key'][0] + if self.pseudo_boxes.get(video_name) is not None and 'box' in self.pseudo_boxes[video_name].keys() and 'loss' in self.pseudo_boxes[video_name].keys(): + # if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2': + video_step_alignment = [self.pseudo_boxes[video_name]['box']] + + else: + if self.opt.pseudo_box_type == 'align': + video_step_segment = [segment_video_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + # elif self.opt.pseudo_box_type == 'similarity': + # video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + # bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)] + # breakpoint() + elif self.opt.pseudo_box_type == "similarity": + # breakpoint() + if self.opt.width_ratio < 0: + video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode) for i in range(N)] + else: + video_step_alignment = [align_frame_into_steps_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode, ratio=self.opt.width_ratio) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op': + video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=False, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order': + video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order_v1': + video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order_v2': + video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'weight_sim': + if self.opt.width_ratio < 0: + video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)] + else: + # breakpoint() + video_step_alignment = [step_retrieval_weight_sim_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)] + + elif self.opt.pseudo_box_type == 'weight_index': + video_step_alignment = [step_retrieval_weight_index(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)] + elif self.opt.pseudo_box_type == 'modeframe': + video_step_alignment = [align_frame_into_steps_mode(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)] + elif self.opt.pseudo_box_type == 'uniform': + video_step_alignment = [uniform_box(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + # breakpoint() + else: + raise NotImplementedError('pseudo_box_type {} is not implemented'.format(self.opt.pseudo_box_type)) + + + if self.opt.pseudo_box_type != 'align': + if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2': + # breakpoint() + video_step_alignment, loss_op = [out[0] for out in video_step_alignment], [out[1] for out in video_step_alignment] + self.pseudo_boxes[video_name] = {'box': video_step_alignment[0], 'loss': loss_op[0].item()} + else: + self.pseudo_boxes[video_name] = {'box': video_step_alignment[0]} + + if self.opt.pseudo_box_type != 'align': + bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)] + else: + bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + + + # self.pseudo_boxes[video_name] = video_step_alignment[0] + # self.pseudo_boxes[video_name] = video_step_alignment[0] + # bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + + bbox_alignment = to_center_duration(bbox_alignment) + + + for sample in range(len(dt['video_target'])): + dt['video_target'][sample]['boxes_pseudo'] = bbox_alignment[sample] + # dt['video_target'][sample]['boxes'] = bbox_alignment[sample] + # else: + # print('use gt box') + + #breakpoint() + others = {'memory': memory, + 'mask_flatten': mask_flatten, + 'spatial_shapes': temporal_shapes, + 'level_start_index': level_start_index, + 'valid_ratios': valid_ratios, + 'proposals_mask': proposals_mask, + 'text_embed': text_embed, + 'event_embed': event_embed} + # breakpoint() + if eval_mode or self.opt.caption_loss_coef == 0: + out, loss = self.parallel_prediction_full(dt, criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + else: + if self.opt.refine_pseudo_box and self.opt.use_pseudo_box: + # print('refine') + out, loss = self.parallel_prediction_refine_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + else: + # print('no refine') + out, loss = self.parallel_prediction_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + return out, loss + + def predict_event_num(self, counter, hs_lid): + hs_lid_pool = torch.max(hs_lid, dim=1, keepdim=False)[0] # [bs, feat_dim] + outputs_class0 = counter(hs_lid_pool) + return outputs_class0 + + def parallel_prediction_full(self, dt, criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + ''' + hs: [decoder_layer, bs, num_query, feat_dim] + init_reference: [bs, num_query, 1] + inter_references: [decoder_layer, bs, num_query, 2] + ''' + outputs_classes = [] + outputs_classes0 = [] + outputs_coords = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + num_pred = hs.shape[0] + #breakpoint() + for l_id in range(hs.shape[0]): + if l_id == 0: + reference = init_reference + else: + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + output_count = self.predict_event_num(self.count_head[l_id], hs_lid) + n_pred_sentence = output_count.argmax(dim=-1).clamp(min=1).item() + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 4] + + # if self.opt.disable_mid_caption_heads and (l_id != hs.shape[0] - 1): + if l_id != hs.shape[0] - 1: + cap_probs, seq = self.caption_prediction_eval( + self.caption_head[l_id], dt, hs_lid, reference, others, 'none') + else: + cap_probs, seq = self.caption_prediction_eval( + self.caption_head[l_id], dt, hs_lid, reference, others, self.opt.caption_decoder_type) # Only output caption in the last decoding layer + + # if self.opt.use_anchor: + # outputs_coord = reference + # else: + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if self.opt.matcher_type == 'DTW': + assert reference.shape[-1] == 2 and tmp.shape[-1] == 2 + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :2] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 2] + + outputs_classes.append(outputs_class) + outputs_classes0.append(output_count) + outputs_coords.append(outputs_coord) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + output_count = torch.stack(outputs_classes0) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + + all_out = {'pred_logits': outputs_class, + 'pred_count': output_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs} + out = {k: v[-1] for k, v in all_out.items()} + + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + + # loss, _, _ = criterion(out, dt['video_target'], others) + return out, [] + + def parallel_prediction_refine_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + + outputs_classes = [] + outputs_counts = [] + outputs_coords = [] + outputs_cap_costs = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + cl_match_mats = [] + + num_pred = hs.shape[0] + if self.opt.pseudo_box_aug: + assert self.opt.use_pseudo_box + num_sentence = dt['gt_boxes'].size(-2) + assert num_sentence == len(dt['cap_raw'][0]) + if self.opt.pseudo_box_aug_num * num_sentence > self.opt.num_queries: + aug_num = self.opt.num_queries // num_sentence + else: + aug_num = self.opt.pseudo_box_aug_num + if self.opt.refine_pseudo_box: + ori_dt_cap_tensor = copy.deepcopy(dt['cap_tensor']) + ori_dt_cap_mask = copy.deepcopy(dt['cap_mask']) + cap_dim = dt['cap_tensor'].shape[-1] #(num_sen, num_max_word) + dt['cap_tensor'] = dt['cap_tensor'].repeat(1, aug_num).reshape(-1, cap_dim) + dt['cap_mask'] = dt['cap_mask'].repeat(1, aug_num).reshape(-1, cap_dim) + + for l_id in range(num_pred): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[ + l_id - 1] # [decoder_layer, batch, query_num, ...] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid) + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 2] + + cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, + reference, others, 'none') + + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :1] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 4] + + # Processing the text embed and event embed for alignment + if self.load_text_embed or self.opt.disable_contrastive_projection: + assert others['text_embed'].shape[0] == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0]) + text_embed = others['text_embed'][l_id] # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + # event_embed = event_embed.reshape(-1, event_embed.shape[-1]) + # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat' + + + if self.opt.enable_contrastive and self.opt.set_cost_cl > 0: + assert len(others['text_embed']) == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed'])) + text_embed = torch.cat(others['text_embed'][l_id], dim=0) # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t() + # cl_match_mat: [num_query, num_sentence] + cl_match_mats.append(cl_match_mat) + else: + cl_match_mats.append(0) + + outputs_classes.append(outputs_class) + outputs_counts.append(outputs_count) + outputs_coords.append(outputs_coord) + # outputs_cap_losses.append(cap_loss) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + outputs_count = torch.stack(outputs_counts) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + # outputs_cap_loss = torch.stack(outputs_cap_losses) + + all_out = { + 'pred_logits': outputs_class, + 'pred_count': outputs_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs, + 'cl_match_mats': cl_match_mats} + out = {k: v[-1] for k, v in all_out.items()} + + + # ============================= Refine pseudo box here ================================ + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + mil_dict = {} + bag_score_cache = [] + for stage in range(self.opt.refine_pseudo_stage_num): + # Decay augment ratio as the stage increases + aug_ratio = self.opt.pseudo_box_aug_ratio * (0.5 ** stage) + _, last_indices, aux_indices = criterion(out, dt['video_target'], others, aug_num, aug_ratio) + # Only use the last decoder layer output to conduct the pseudo box refinement + hs_lid = hs[-1] + reference = inter_references[-1] #[1, num_query, 2] + indices = last_indices[0] # [tensor(): num_matched_query ,tensor(): num_matched_cap] + query_indices = indices[0][0] # the indices of matched query is ordered + cap_indices = indices[0][1] # the indices of matched sentence is unordered + # breakpoint() + # num_sentence = cap_indices.size(0) // self.opt.pseudo_box_aug_num + cap_sort = torch.sort(cap_indices)[1] + reorder_query_indices = query_indices[cap_sort] + if self.opt.use_neg_pseudo_box: + neg_query_indices = [] + neg_cap_indices = torch.arange(0,cap_indices.size(0),aug_num).view(num_sentence,-1).repeat(1,self.opt.num_neg_box).view(-1) + for i in range(num_sentence): + # select some negetive indices from reordered query indices + candidates_r = (reorder_query_indices[(i+1)*aug_num:]) + candidates_l = (reorder_query_indices[:(i)*aug_num]) + if (candidates_r.size(0) > 0) and (candidates_l.size(0) > 0): + candidates = torch.cat((candidates_r, candidates_l)) + else: + candidates = candidates_r if candidates_r.size(0) > 0 else candidates_l + if candidates.size(0) == 0: + candidates = reorder_query_indices + if candidates.size(0) < self.opt.num_neg_box: + random_selected_indices = torch.randperm(candidates.size(0)) + padding_num = self.opt.num_neg_box - candidates.size(0) + random_selected_indices = torch.cat((random_selected_indices, random_selected_indices[:padding_num])) + else: + random_selected_indices = torch.randperm(reorder_query_indices.size(0)-aug_num)[:self.opt.num_neg_box] + neg_query_indices.append(candidates[random_selected_indices]) + neg_query_indices = torch.cat(neg_query_indices) + neg_indices = [(neg_query_indices, neg_cap_indices)] + # query_indices: ordered, cap_indices: unordered + # ++++++ <1>. Produce the instance score and classification score + if self.opt.use_additional_cap_layer: + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + if (stage > 0) and self.opt.use_neg_pseudo_box: + _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, neg_indices) + else: + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + if (stage > 0) and self.opt.use_neg_pseudo_box: + _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, neg_indices) + # breakpoint() + # sentence_cap_prob: the caption probility for each matched query torch.Size([num_matched_query]) + if self.opt.use_additional_score_layer: + query_ins_score = self.class_refine_head[stage](hs_lid)[:, query_indices, :] + else: + query_ins_score = outputs_classes[-1][:, query_indices, :] # [1, num_matched_query, 1] + query_pred_boxes = outputs_coord[-1][:, query_indices, :] # [1, num_matched_query, 2] + query_pred_boxes = query_pred_boxes[0,:,:][cap_sort].view(-1, 2) # [num_matched_query, 2] + # breakpoint() + try: + query_ins_score = query_ins_score[0,cap_sort,0].view(-1, aug_num) # [num_cap, num_aug] + except: + breakpoint() + if self.opt.norm_ins_score == 'softmax': + query_ins_score = torch.softmax(query_ins_score, dim=-1) + elif self.opt.norm_ins_score == 'sigmoid': + query_ins_score = query_ins_score.sigmoid() + else: + raise NotImplementedError + + # breakpoint() + # sentence_cap_score = cap_probs['cap_prob_train'] + temperature = 2 + sentence_cap_prob = sentence_cap_prob[cap_sort].view(-1, aug_num) # [num_cap, num_aug] + cap_len = torch.tensor([len(cap.split()) for cap in dt['cap_raw'][0]], device=sentence_cap_prob.device).unsqueeze(1) + sentence_cap_score = (sentence_cap_prob / cap_len) ** temperature + 1e-5 + + sentence_cap_score[torch.isinf(sentence_cap_score)] = 1e8 + + sentence_cap_score = sentence_cap_score.detach() + query_ins_score = query_ins_score.detach() + + # breakpoint() + query_score = sentence_cap_score + query_ins_score + # sentence_score = + # if (stage == 0) or (self.opt.focal_mil == False): + # sentence_cap_prob = torch.softmax(sentence_cap_prob, dim=-1) # Softmax over queries in the same bag + # else: + # sentence_cap_prob = sentence_cap_prob.sigmoid() + + # if self.opt.cap_prob_clip: + # query_score = sentence_cap_prob.detach() * query_ins_score # [num_cap, num_aug] + # else: + # query_score = sentence_cap_prob * query_ins_score # [num_cap, num_aug] + + # # ++++++ <2>. Calculate the MIL loss and Neg loss + bag_score = query_score.sum(dim=-1) # [num_cap] + bag_score = bag_score.clamp(0,1) + bag_score_cache.append(bag_score) + mil_weight = bag_score_cache[stage-1] if self.opt.weighted_mil_loss else torch.ones_like(bag_score).to(bag_score.device) + if stage > 0: + if self.opt.focal_mil: + focal_weight = (torch.ones_like(bag_score).to(bag_score.device) - bag_score).pow(2) + mil_loss = - focal_weight * (bag_score + 1e-6).log() + mil_loss = (mil_weight * mil_loss).mean() + else: + # breakpoint() + mil_loss = - (mil_weight * bag_score.log()).mean() + if self.opt.use_neg_pseudo_box: + neg_cap_prob = neg_cap_prob.sigmoid() + neg_loss = - ((neg_cap_prob).pow(2) * (1- neg_cap_prob).log()).view(num_sentence,-1).mean(dim=-1) + neg_loss = (mil_weight * neg_loss).mean() + mil_loss += neg_loss + else: + mil_loss = F.binary_cross_entropy(bag_score, torch.ones_like(bag_score).to(bag_score.device)) + if 'loss_mil' in mil_dict.keys(): + mil_dict['loss_mil'] += mil_loss + else: + mil_dict['loss_mil'] = mil_loss + # ++++++ <3>. Merge the pseudo box to generate new pseudo box + if self.opt.merge_criterion == 'cap_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(sentence_cap_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k] + elif self.opt.merge_criterion == 'ins_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_ins_score, k=self.opt.merge_k_boxes, dim=-1) + elif self.opt.merge_criterion == 'ins_cap_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k] + else: + raise NotImplementedError('merge_criterion {} is not implemented'.format(self.opt.merge_criterion)) + # breakpoint() + topk_pseudo_scores = topk_pseudo_scores / (topk_pseudo_scores.sum(dim=-1, keepdim=True) + 1e-6) # [num_caption, k] + weight = topk_pseudo_scores.unsqueeze(-1).repeat(1,1,2) # [num_caption, k, 2] + for i in range(len(dt['video_target'])): + previous_pseudo_box = dt['video_target'][i]['box_pseudo_aug'] #[num_caption*num_aug, 2] + if self.opt.use_query_box_for_refine: + # Use the coordinates of query as part of guidance for refinement + previous_pseudo_box = (previous_pseudo_box + query_pred_boxes) / 2 + if self.opt.merge_mode == 'weighted_sum': + # Merge top-k boxes with weighted sum + selected_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \ + topk_pseudo_indices.unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) # [num_caption, k, 2] + refined_pseudo_box = (weight * selected_pseudo_box).sum(dim=1).clamp(0,1) # [num_caption, 2] + dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone() + # I met the following problem with ''targets_cp = copy.deepcopy(targets)'' in criterion.py: + # RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment + # When I tried to conduct the deepcopy operation with the targets which have been updated with 'boxes_pseudo' keys + # So I detach the refined_pseudo_box here to avoid the deepcopy operation here + # Commented by Huabin, 2023/9/14 + elif self.opt.merge_mode == 'interpolate': + # Generate new box with linear interpolation between previous pbox and pbox with max score + max_pseudo_scores = topk_pseudo_scores[:,:1] + max_coef = 0.5 * torch.ones_like(max_pseudo_scores).to(max_pseudo_scores.device) # Set a max coef for box interpolatation + max_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \ + topk_pseudo_indices[:,:1].unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) + interpolate_coef = torch.min(max_pseudo_scores, max_coef) + refined_pseudo_box = (1-interpolate_coef) * previous_pseudo_box[(aug_num-1)::aug_num, :] \ + + interpolate_coef * max_pseudo_box.squeeze(1) + refined_pseudo_box = refined_pseudo_box.clamp(0,1) + dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone() + + # ++++++ <4>. End of the refinement, inverse-repeat the dt['cap_tensor'] and dt['cap_mask'] + dt['cap_tensor'] = ori_dt_cap_tensor + dt['cap_mask'] = ori_dt_cap_mask + mil_dict['loss_mil'] = mil_dict['loss_mil'] / self.opt.refine_pseudo_stage_num + criterion.pseudo_box_aug = False + # ================== End of refinement ======================================== + # breakpoint() + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + loss, last_indices, aux_indices = criterion(out, dt['video_target'], others) + if self.opt.disable_rematch: + # Disable re-matching and directly use the indices with max score in the last stage of refinment + selected_indices = query_score.argmax(dim=-1).unsqueeze(-1) + query_indices_in_refine = reorder_query_indices.to(selected_indices.device).view(-1, aug_num) + query_indices_in_refine = query_indices_in_refine.gather(1, selected_indices) + query_indices_in_refine, index_sort = torch.sort(query_indices_in_refine, 0) + cap_indices_in_refine = last_indices[0][0][1].sort()[0] + last_indices = [[(query_indices_in_refine.view(-1), cap_indices_in_refine[index_sort.view(-1)])], last_indices[1]] + loss.update(mil_dict) + criterion.pseudo_box_aug = True + for l_id in range(hs.shape[0]): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[l_id - 1] + indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + if l_id != hs.shape[0] - 1: + l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()} + loss.update(l_dict) + out.update({'caption_probs': cap_probs, 'seq': seq}) + else: + loss, last_indices = criterion(out, dt['video_target'], others) + criterion.pseudo_box_aug = True + l_id = hs.shape[0] - 1 + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + indices = last_indices[0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + loss.update(l_dict) + + out.pop('caption_losses') + out.pop('caption_costs') + out.update({'caption_probs': cap_probs, 'seq': seq}) + + + return out, loss + + def parallel_prediction_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + + outputs_classes = [] + outputs_counts = [] + outputs_coords = [] + outputs_cap_costs = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + cl_match_mats = [] + + num_pred = hs.shape[0] + + if self.opt.pseudo_box_aug: + assert self.opt.use_pseudo_box + cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word) + dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + + for l_id in range(num_pred): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[ + l_id - 1] # [decoder_layer, batch, query_num, ...] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid) + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 2] + + + cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, + reference, others, 'none') + # if self.opt.use_anchor: + # outputs_coord = reference + # else: + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :1] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 4] + + # Processing the text embed and event embed for alignment + if self.load_text_embed or not self.opt.disable_contrastive_projection: + assert others['text_embed'].shape[0] == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0]) + text_embed = others['text_embed'][l_id] # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + # event_embed = event_embed.reshape(-1, event_embed.shape[-1]) + # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat' + + + if self.opt.enable_contrastive and self.opt.set_cost_cl > 0: + assert len(others['text_embed']) == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed'])) + text_embed = torch.cat(others['text_embed'][l_id], dim=0) # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t() + # cl_match_mat: [num_query, num_sentence] + cl_match_mats.append(cl_match_mat) + else: + cl_match_mats.append(0) + + outputs_classes.append(outputs_class) + outputs_counts.append(outputs_count) + outputs_coords.append(outputs_coord) + # outputs_cap_losses.append(cap_loss) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + outputs_count = torch.stack(outputs_counts) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + # outputs_cap_loss = torch.stack(outputs_cap_losses) + + all_out = { + 'pred_logits': outputs_class, + 'pred_count': outputs_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs, + 'cl_match_mats': cl_match_mats} + out = {k: v[-1] for k, v in all_out.items()} + + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + if transformer_input_type == 'prior_proposals': + loss, _, _ = criterion(out, dt['video_target']) + # Random select an query from each segment + num_sentence = dt['cap_tensor'].shape[0] + num_query = hs.shape[-2] + num_query_interval = num_query // num_sentence + query_indices = [] + for i in range(num_sentence): + interval_min = i * num_query_interval + interval_max = interval_min + num_query_interval + sample = torch.randint(interval_min, interval_max, (hs.shape[0],)) + query_indices.append(sample) + query_indices = torch.cat(query_indices, dim=0) + gt_indices = torch.arange(num_sentence) + + last_indices = ([(query_indices[::hs.shape[0]], gt_indices)], [None, None]) + aux_indices = [] + for l_id in range(hs.shape[0]-1): + aux_indices.append(([(query_indices[(l_id+1)::hs.shape[0]], gt_indices)], [None, None])) + else: + loss, last_indices, aux_indices = criterion(out, dt['video_target'], others) + for l_id in range(hs.shape[0]): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[l_id - 1] + indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + + l_dict = {'loss_caption': cap_loss} + if (self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim'): + contrastive_loss = contrastive_criterion( + text_embed = others['text_embed'][l_id], + event_embed = others['event_embed'][l_id], + matching_indices = indices, + bg_embed = self.background_embed, + ) + + l_dict.update({'contrastive_loss': contrastive_loss}) + if l_id != hs.shape[0] - 1: + l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()} + loss.update(l_dict) + out.update({'caption_probs': cap_probs, 'seq': seq}) + else: + loss, last_indices = criterion(out, dt['video_target'], others) + + l_id = hs.shape[0] - 1 + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + indices = last_indices[0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + loss.update(l_dict) + + out.pop('caption_losses') + out.pop('caption_costs') + out.update({'caption_probs': cap_probs, 'seq': seq}) + + return out, loss + + def caption_prediction(self, cap_head, dt, hs, reference, others, captioner_type, indices=None): + N_, N_q, C = hs.shape + # all_cap_num = len(dt['cap_tensor']) + # if self.opt.pseudo_box_aug: + # assert self.opt.use_pseudo_box + # cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word) + # # breakpoint() + # if indices != None: + # breakpoint() + # dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + # dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + all_cap_num = len(dt['cap_tensor']) + query_mask = others['proposals_mask'] + gt_mask = dt['gt_boxes_mask'] + mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item()) + query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu() + hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C) + + if indices == None: + row_idx, col_idx = 0, 0 + for i in range(N_): + mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1 + row_idx=row_idx + query_nums[i] + col_idx= col_idx + gt_nums[i] + + bigids = mix_mask.nonzero(as_tuple=False) + feat_bigids, cap_bigids = bigids[:, 0], bigids[:, 1] + else: + # breakpoint() + feat_bigids = torch.zeros(sum([len(_[0]) for _ in indices])).long() + cap_bigids = torch.zeros_like(feat_bigids) + total_query_ids = 0 + total_cap_ids = 0 + total_ids = 0 + max_pair_num = max([len(_[0]) for _ in indices]) + new_hr_for_dsa = torch.zeros(N_, max_pair_num, C) # only for lstm-dsa + cap_seq = dt['cap_tensor'] + new_seq_for_dsa = torch.zeros(N_, max_pair_num, cap_seq.shape[-1], dtype=cap_seq.dtype) # only for lstm-dsa + for i, index in enumerate(indices): + feat_ids, cap_ids = index + feat_bigids[total_ids: total_ids + len(feat_ids)] = total_query_ids + feat_ids + cap_bigids[total_ids: total_ids + len(feat_ids)] = total_cap_ids + cap_ids + new_hr_for_dsa[i, :len(feat_ids)] = hs[i, feat_ids] + new_seq_for_dsa[i, :len(feat_ids)] = cap_seq[total_cap_ids + cap_ids] + total_query_ids += query_nums[i] + total_cap_ids += gt_nums[i] + total_ids += len(feat_ids) + # if self.opt.pseudo_box_aug: + # # Revise the matched targer ids for pseudo box augmentation to caption id + # cap_bigids = cap_bigids // self.opt.pseudo_box_aug_num + cap_probs = {} + flag = True + + if captioner_type == 'none': + cost_caption = torch.zeros(N_, N_q, all_cap_num, + device=hs.device) # batch_size * num_queries * all_caption_num + loss_caption = torch.zeros(N_, N_q, all_cap_num, device=hs.device) + cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device) + cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device) + seq = torch.zeros(N_, N_q, 3, device=hs.device) + return cost_caption, loss_caption, cap_probs, seq + + elif captioner_type in ['light']: + clip = hs_r.unsqueeze(1) + clip_mask = clip.new_ones(clip.shape[:2]) + event = None + elif self.opt.caption_decoder_type == 'standard': + # breakpoint() + # assert N_ == 1, 'only support batchsize = 1' + if self.training: + # breakpoint() + seq = dt['cap_tensor'][cap_bigids] + if self.opt.caption_cost_type != 'rl': + if self.opt.refine_pseudo_box: # Only training and refine_pseudo_box = True returns the raw_cap_prob + cap_prob, raw_cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) + # shape: [num_sentence, max_num_word, num_vocab] + # cap_prob is log_softmax(prob), raw_cap_prob is (prob) + cap_probs['cap_prob_train'] = cap_prob + cap_probs['raw_cap_prob'] = raw_cap_prob + else: + cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) + # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608], here 13 is the max length among 5 sentences + cap_probs['cap_prob_train'] = cap_prob + else: + with torch.no_grad(): + cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, + dt['cap_tensor'][cap_bigids]) + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + flag = False + pass + + if flag: + clip_ext = clip[feat_bigids] + clip_mask_ext = clip_mask[feat_bigids] + + if self.training: + seq = dt['cap_tensor'][cap_bigids] + if self.opt.caption_cost_type != 'rl': + cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq) + cap_probs['cap_prob_train'] = cap_prob + else: + with torch.no_grad(): + seq_gt = dt['cap_tensor'][cap_bigids] + cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq_gt) + seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask) + + if len(seq): + # re_seq = torch.zeros(N_, N_q, seq.shape[-1]) + # re_cap_prob_eval = torch.zeros(N_, N_q, cap_prob_eval.shape[-1]) + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + if self.opt.caption_cost_type == 'loss': + cap_prob = cap_prob.reshape(-1, cap_prob.shape[-2], cap_prob.shape[-1]) # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608] + caption_tensor = dt['cap_tensor'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13] + caption_mask = dt['cap_mask'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13] + cap_loss = cap_head.build_loss(cap_prob, caption_tensor, caption_mask) # [num_query] + cap_cost = cap_loss + else: + raise AssertionError('caption cost type error') + + # Calculate caption probs for each query + # breakpoint() + # if self.opt.refine_pseudo_box: + # sentence_cap_prob = cap_head.build_prob(raw_cap_prob, caption_tensor, caption_mask) + # else: + sentence_cap_prob = - cap_loss + + if indices: + return cap_loss.mean(), cap_probs, seq, sentence_cap_prob + # cap_loss.mean(): [num_matched_query] --> [1], + # cap_probs: dict, contains 'cap_prob_train' or 'cap_prob_eval' [num_matched_query, max_length_sentence, num_word_in_vocab] + # seq: [num_sentence, max_length_sentence+1], here the '+1' means the 1st col is all '0' + + cap_id, query_id = cap_bigids, feat_bigids + cost_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1)) + cost_caption[query_id, cap_id] = cap_cost + loss_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1)) + loss_caption[query_id, cap_id] = cap_loss + cost_caption = cost_caption.reshape(-1, N_q, + max(cap_id) + 1) # batch_size * num_queries * all_caption_num + loss_caption = loss_caption.reshape(-1, N_q, max(cap_id) + 1) + return cost_caption, loss_caption, cap_probs, seq + + def caption_prediction_eval(self, cap_head, dt, hs, reference, others, decoder_type, pred_num=None, indices=None): + assert indices == None + N_, N_q, C = hs.shape + query_mask = others['proposals_mask'] + gt_mask = dt['gt_boxes_mask'] + mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item()) + query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu() + hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C) + + row_idx, col_idx = 0, 0 + for i in range(N_): + mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1 + row_idx = row_idx + query_nums[i] + col_idx = col_idx + gt_nums[i] + + cap_probs = {} + + if decoder_type in ['none']: + cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device) + cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device) + seq = torch.zeros(N_, N_q, 3, device=hs.device) + return cap_probs, seq + + elif decoder_type in ['light']: + clip = hs_r.unsqueeze(1) + clip_mask = clip.new_ones(clip.shape[:2]) + event = None + seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + elif decoder_type in ['standard']: + assert N_ == 1, 'only support batchsize = 1' + with torch.no_grad(): + if self.opt.transformer_input_type == 'prior_proposals': + # hs: [bs, num_query, feat_dim] + # reference: [bs, num_query, 2] + if pred_num: + num_cap = pred_num + else: + num_cap = dt['cap_tensor'].shape[0] + interval = N_q // num_cap + pool_layer = torch.nn.AvgPool1d(interval,stride=interval) + hs = pool_layer(hs.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # [batch, num_sentence, dim] + reference = pool_layer(reference.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # # [batch, num_sentence, 2] + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, num_cap, seq.shape[-1]) # + cap_prob_eval = cap_prob_eval.reshape(-1, num_cap, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + else: + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) # + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + return cap_probs, seq + + +class PostProcess(nn.Module): + """ This module converts the model's output into the format expected by the coco api""" + + def __init__(self, opt): + super().__init__() + self.opt = opt + + @torch.no_grad() + def forward(self, outputs, target_sizes, loader): + """ Perform the computation + Parameters: + outputs: raw outputs of the model + target_sizes: tensor of dimension [batch_size] containing the size of each video of the batch + """ + out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes'] + N, N_q, N_class = out_logits.shape + assert len(out_logits) == len(target_sizes) + prob = out_logits.sigmoid() # batch, num_queries, 1 + + if self.opt.transformer_input_type == 'prior_proposals': + #topk_values = prob.view(N, N_q) + #topk_indexes = torch.arange(N_q, device=prob.device).unsqueeze(0).repeat(N, 1) + topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), outputs['seq'].shape[1], dim=1) + else: + topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), N_q, dim=1) + scores = topk_values + # topk_boxes = topk_indexes // out_logits.shape[2] + topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode='floor') + labels = topk_indexes % out_logits.shape[2] + boxes = box_ops.box_cl_to_xy(out_bbox) + raw_boxes = copy.deepcopy(boxes) + boxes[boxes < 0] = 0 + boxes[boxes > 1] = 1 + boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 2)) + + scale_fct = torch.stack([target_sizes, target_sizes], dim=1) + boxes = boxes * scale_fct[:, None, :] + seq = outputs['seq'] # [batch_size, num_queries, max_Cap_len=30] + cap_prob = outputs['caption_probs']['cap_prob_eval'] # [batch_size, num_queries] + eseq_lens = outputs['pred_count'].argmax(dim=-1).clamp(min=1) + + if len(seq): + mask = (seq > 0).float() + # cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') / ( + # 1e-5 + mask.sum(2).cpu().numpy().astype('float')) + cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') + seq = seq.detach().cpu().numpy().astype('int') # (eseq_batch_size, eseq_len, cap_len) + caps = [[loader.dataset.translator.rtranslate(s) for s in s_vid] for s_vid in seq] + if self.opt.transformer_input_type != 'prior_proposals': + caps = [[caps[batch][idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)] # Re-arrange the caption order accroding to the logits + cap_scores = [[cap_scores[batch, idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)] + else: + bs, num_queries = boxes.shape[:2] + cap_scores = [[-1e5] * num_queries] * bs + caps = [[''] * num_queries] * bs + + results = [ + {'scores': s, 'labels': l, 'boxes': b, 'raw_boxes': b, 'captions': c, 'caption_scores': cs, 'query_id': qid, + 'vid_duration': ts, 'pred_seq_len': sl} for s, l, b, rb, c, cs, qid, ts, sl in + zip(scores, labels, boxes, raw_boxes, caps, cap_scores, topk_boxes, target_sizes, eseq_lens)] + return results + + +class MLP(nn.Module): + """ Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + + +def build(args): + device = torch.device(args.device) + base_encoder = build_base_encoder(args) + # For text encoder when using DTW matcher + # if args.matcher_type == 'DTW' or args.use_pseudo_box: + # if args.pretrained_language_model == 'UniVL': + # print('Load pretrained UniVL model weights') + # text_encoder = load_pretrained_UniVL() + # else: + # for i in range(10): + # try: + # text_encoder = AutoModel.from_pretrained(args.pretrained_language_model, cache_dir=args.huggingface_cache_dir) + # break + # except: + # print('download error in AutoModel, retry...') + # time.sleep(1) + # else: + # text_encoder = None + + transformer = build_deforamble_transformer(args) + captioner = build_captioner(args) + + model = PDVC( + base_encoder, + transformer, + captioner, + num_classes=args.num_classes, + num_queries=args.num_queries, + num_feature_levels=args.num_feature_levels, + aux_loss=args.aux_loss, + with_box_refine=args.with_box_refine, + opt=args + ) + + matcher = build_matcher(args) + if args.matcher_type == 'DTW' and args.use_anchor: + weight_dict = {'loss_ce': args.cls_loss_coef, + 'loss_bbox': args.bbox_loss_coef, + 'loss_giou': args.giou_loss_coef, + 'loss_self_iou': args.self_iou_loss_coef, + 'loss_ref_rank': args.ref_rank_loss_coef, + 'loss_counter': args.count_loss_coef, + 'loss_caption': args.caption_loss_coef, + 'contrastive_loss': args.contrastive_loss_start_coef, + } + else: + weight_dict = {'loss_ce': args.cls_loss_coef, + 'loss_bbox': args.bbox_loss_coef, + 'loss_giou': args.giou_loss_coef, + 'loss_counter': args.count_loss_coef, + 'loss_caption': args.caption_loss_coef, + 'contrastive_loss': args.contrastive_loss_start_coef, + } + if args.refine_pseudo_box: + weight_dict.update({'loss_mil': args.mil_loss_coef}) + # TODO this is a hack + if args.aux_loss: + aux_weight_dict = {} + for i in range(args.dec_layers - 1): + aux_weight_dict.update({k + f'_{i}': v for k, v in weight_dict.items()}) + weight_dict.update(aux_weight_dict) + + losses = ['labels', 'boxes', 'cardinality'] + + if args.matcher_type == 'DTW' or args.matcher_type == 'Sim': + criterion = AlignCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha, + focal_gamma=args.focal_gamma, opt=args) + contrastive_criterion = ContrastiveCriterion(temperature=args.contrastive_loss_temperature, + enable_cross_video_cl=args.enable_cross_video_cl, + enable_e2t_cl = args.enable_e2t_cl, + enable_bg_for_cl = args.enable_bg_for_cl) + contrastive_criterion.to(device) + else: + criterion = SetCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha, + focal_gamma=args.focal_gamma, opt=args) + contrastive_criterion = None + + criterion.to(device) + postprocessors = {'bbox': PostProcess(args)} + + return model, criterion, contrastive_criterion, postprocessors + + diff --git a/anet_clip/backup/pdvc/position_encoding.py b/anet_clip/backup/pdvc/position_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb71befd6e4397bd4d5a30c7a43861cea158cc7 --- /dev/null +++ b/anet_clip/backup/pdvc/position_encoding.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Various positional encodings for the transformer. +""" +import math +import torch +from torch import nn + +from misc.detr_utils.misc import NestedTensor + + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + self.max_duration = 256 + self.duration_embed_layer = nn.Linear(self.max_duration, self.max_duration) + + def forward(self, tensor_list: NestedTensor): + x = tensor_list.tensors + mask = tensor_list.mask + duration = tensor_list.duration + assert mask is not None + not_mask = ~mask + x_embed = not_mask.cumsum(1, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + x_embed = (x_embed - 0.5) / (x_embed[:, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + # dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + dim_t = self.temperature ** (2 * (torch.div(dim_t, 2, rounding_mode='floor')) / self.num_pos_feats) + pos_x = x_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + + dur_embed = self.duration_embedding(duration).reshape(-1,1,self.max_duration).expand_as(pos_x) + pos = torch.cat((pos_x, dur_embed), dim=2).permute(0, 2, 1) + return pos + + def duration_embedding(self, durations): + out = torch.zeros(len(durations), self.max_duration, device=durations.device) + durations = durations.int() + for ii in range(len(durations)): + out[ii, :durations[ii]] = 1 + out = self.duration_embed_layer(out) + return out + + + +def build_position_encoding(position_embedding, N_steps): + if position_embedding in ('v2', 'sine'): + # TODO find a better way of exposing other arguments + position_embedding = PositionEmbeddingSine(N_steps, normalize=True) + else: + raise ValueError(f"not supported {position_embedding}") + + return position_embedding diff --git a/anet_clip/backup/pdvc/util.py b/anet_clip/backup/pdvc/util.py new file mode 100644 index 0000000000000000000000000000000000000000..7e489c1bce356a96116e2c13fcabc1c84d132711 --- /dev/null +++ b/anet_clip/backup/pdvc/util.py @@ -0,0 +1,72 @@ +import torch +import numpy as np + +# def find_center_index(array: np.ndarray) -> np.ndarray: +# """ +# Given a array with shape [steps, topk], find the center index between topk indexes +# which has the minimal average distance with other indexes. + +# Args: +# - array: numpy array representing the input array with shape [steps, topk] + +# Returns: +# - center_indexes: numpy array of center indexes for each step +# """ + +# distances = np.sum(np.abs(array[:, np.newaxis, :] - array[:, :, np.newaxis]), axis=2) +# center_indexes = np.argmin(distances, axis=1) + +# return center_indexes + +def find_center_value(arr): + # Compute pairwise distances between all values + distances = np.abs(arr[:, np.newaxis] - arr[np.newaxis, :]) + + # Sum distances for each value + sum_distances = np.sum(distances, axis=1) + + # Find the index of the value with the smallest sum distance + center_index = np.argmin(sum_distances) + + # Get the center value + center_value = arr[center_index] + + return center_value + + +def compute_overlap(center_t, boundary_t, center_t_minus_1, boundary_t_minus_1): + """ + Compute the overlap of boundaries between time t and t-1 for each element in the arrays. + + Args: + - center_t: numpy array representing the center at time t with shape [N,] + - boundary_t: numpy array representing the boundary at time t with shape [N,1, candidates] + - center_t_minus_1: numpy array representing the center at time t-1 with shape [N,] + - boundary_t_minus_1: numpy array representing the boundary at time t-1 with shape [N,] + + Returns: + - overlap: numpy array representing the overlap of boundaries with shape [N,] + """ + + boundary_t = boundary_t.squeeze(1) + boundary_t_minus_1 = boundary_t_minus_1.squeeze(1) + center_t = center_t[:, np.newaxis] + # breakpoint() + center_t_minus_1 = center_t_minus_1[:, np.newaxis] + # boundary_t_minus_1 = boundary_t_minus_1[:, np.newaxis] + + + # Calculate the start and end positions of the boundaries at time t and t-1 + start_t = center_t - 0.5 * boundary_t + end_t = center_t + 0.5 * boundary_t + start_t_minus_1 = center_t_minus_1 - 0.5 * boundary_t_minus_1 + end_t_minus_1 = center_t_minus_1 + 0.5 * boundary_t_minus_1 + + # Calculate the intersection and union of the boundaries + intersection = np.maximum(0, np.minimum(end_t, end_t_minus_1) - np.maximum(start_t, start_t_minus_1)) + union = boundary_t + boundary_t_minus_1 - intersection + + # Compute the overlap using the Intersection over Union (IoU) formula + overlap = intersection / union + + return overlap \ No newline at end of file diff --git a/anet_clip/backup/pdvc/video_segmentation.py b/anet_clip/backup/pdvc/video_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..93775df585e53204022fceb86c693516386b6023 --- /dev/null +++ b/anet_clip/backup/pdvc/video_segmentation.py @@ -0,0 +1,917 @@ +import torch +import numpy as np + + +from pdvc.dp.exact_dp import drop_dtw, double_drop_dtw +from pdvc.dp.dp_utils import compute_sim +import statistics +from sklearn.cluster import KMeans +from pdvc.util import find_center_value, compute_overlap +# from config import CONFIG + +''' configs of original file ''' +config_eval_l2norm = True +config_eval_keep_percentile = 0.48 +config_eval_fixed_drop_sim = -1 + + +''' +return value: +frame features: [num_frames, feature_dim] -> optimal_assignment: [num_steps], -1 means no match, otherwise means the index of the matched step/caption/query + +''' +# filter_threshold = 0.5 + +def clip_array(arr, threshold): + clipped_arr = np.where(arr > threshold, arr, threshold) + return clipped_arr + + +# def compute_filtered_indices(topk_indices_list, topk_values_list, scale=0.5): +# # center_indices = [] +# # boundary_widths = [] +# filtered_indices_list = [] +# for topk_indices, topk_values in zip(topk_indices_list, topk_values_list): +# center_index = find_center_value(topk_indices) +# std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / sum(topk_values)) ** 0.5 +# boundary_width = std_index * scale +# filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width] +# filtered_indices_list.append(filtered_indices) +# # center_indices.append(center_index) +# # boundary_widths.append(boundary_width) + +# return filtered_indices_list + +def compute_filtered_indices(topk_indices, topk_values, threshold=0.5): + center_index = find_center_value(np.array(topk_indices)) + std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / (sum(topk_values) + 1e-5)) ** 0.5 + boundary_width = std_index * threshold + filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width] + return filtered_indices + +def compute_bbox_loss(index_list, box, similarity_values): + left, right = box + distances = [] + + for i, index in enumerate(index_list): + if left <= index <= right: + distance = -min(index - left, right - index) + else: + distance = max(left - index, index - right) + + weighted_distance = similarity_values[i] * distance + distances.append(weighted_distance) + + return sum(distances) + + + + + +def remove_outliers(indices, threshold, mode, w): + # Calculate the mean and standard deviation of the indices + if mode == 'median': + median = statistics.median(indices) + elif mode == 'mean': + mean = sum(indices) / len(indices) + elif mode == 'mode': + count_dict = {} + for p in range(min(indices), max(indices) + 1): + # print(p) + count = sum(1 for c in indices if p - w <= c <= p + w) + count_dict[p] = count + + max_count = max(count_dict.values()) + best_p_values = [p for p, count in count_dict.items() if count == max_count] + if len(best_p_values) % 2 == 0: + best_p_values.pop() + + mode_value = statistics.median(best_p_values) + std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5 + + # if mode == 'mode': + # '''get mode-similar statistics''' + # count_dict = {} + # for p in range(min(indices), max(indices) + 1): + # # print(p) + # count = sum(1 for c in indices if p - w <= c <= p + w) + # count_dict[p] = count + + # max_count = max(count_dict.values()) + # best_p_values = [p for p, count in count_dict.items() if count == max_count] + # if len(best_p_values) % 2 == 0: + # best_p_values.pop() + + # mode_value = statistics.median(best_p_values) + + # Calculate the threshold for identifying outliers + threshold_value = threshold * std_dev + + # Filter out indices that are far from the mean + # breakpoint() + + if mode == 'median': + filtered_indices = [i for i in indices if abs(i - median) <= threshold_value] + elif mode == 'mode': + filtered_indices = [i for i in indices if abs(i - mode_value) <= threshold_value] + return filtered_indices + + +def remove_outliers_v1(indices, threshold): + pass + +def get_mode(indices, w): + count_dict = {} + for p in range(min(indices), max(indices) + 1): + # print(p) + count = sum(1 for c in indices if p - w <= c <= p + w) + count_dict[p] = count + + max_count = max(count_dict.values()) + best_p_values = [p for p, count in count_dict.items() if count == max_count] + if len(best_p_values) % 2 == 0: + best_p_values.pop() + + mode_value = statistics.median(best_p_values) + return mode_value + +def get_mode_box(sim, topk, w, ratio): # topk选择20 ratio 1 + ''' 注意这里算中心的时候使用前topk是因为更相信前topk的准确率 但是确定中心以后需要找边界 就需要使用全部的''' + avg_caption_length = sim.shape[1] // sim.shape[0] + sorted_idx = torch.argsort(-sim, dim=1) + top_indices = sorted_idx[:, :topk] + # top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True) + # top_indices_half = top_indices[:, :topk//2] + top_cap_indices = sorted_idx[:, :avg_caption_length] + # sorted_idx = torch.argsort(-sim, dim=1) + width = int(ratio * avg_caption_length / 2) # ratio选择1 + + bbox = [] + for i in range(top_indices.shape[0]): + # index_list = top_indices[i].tolist() + mode_value = get_mode(top_indices[i].tolist(), w) + filtered_indices = [i for i in top_cap_indices[i].tolist() if abs(i - mode_value) <= width] + + # if len(filtered_indices) == 0: + # filtered_indices = remove_outliers(sim[i].tolist(), top_indices[i].tolist(), 0.5, mode='median', w=w) + # if len(filtered_indices) == 0: + # bbox.append([0, sim.shape[1] - 1]) + # continue + if len(filtered_indices) == 0: + bbox.append([mode_value-width, mode_value+width]) + else: + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +def compute_threshold(data, threshold): + mean = sum(data) / len(data) + std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5 + threshold_value = threshold * std_dev + return threshold_value + + +# using similarity as weight to find center +''' find center globally, then find the boundary locally. + 1. find center: use the similarity as weight to find the center + 2. find boundary: use the center to find the boundary. steps are ''' +def step_retrieval_weight_sim(frame_features, step_features, topk=15, threshold=0.5, w=2): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # sim sum along a window + window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + + if len(window_sums.shape) == 1: + window_sums = window_sums.unsqueeze(0) + flag = 1 + else: + flag = 0 + + top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True) + # breakpoint() + + # Find the frame with the maximum sum in each step + _, step_center_frames = window_sums.max(dim=1) + step_center_frames = step_center_frames.squeeze() + + if flag == 1: + step_center_frames = step_center_frames.unsqueeze(0).tolist() + else: + step_center_frames = step_center_frames.tolist() + + bbox = [] + for i in range(top_indices.shape[0]): + threshold_value = compute_threshold(top_indices[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value] + if len(filtered_indices) == 0: + bbox.append([step_center_frames[i] - w, step_center_frames[i] + w]) + else: + bbox.append([w + min(filtered_indices), w + max(filtered_indices)]) + + return bbox + +''' TODO: get the right weight using index''' +def step_retrieval_weight_index(frame_features, step_features, topk=15, threshold=0.5, w=2): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_idx = torch.argsort(-similarity_matrix, dim=1) + # sim sum along a window + window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + + top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True) + # breakpoint() + + # Find the frame with the maximum sum in each step + _, step_center_frames = window_sums.max(dim=1) + step_center_frames = step_center_frames.squeeze().tolist() + + bbox = [] + for i in range(top_indices.shape[0]): + threshold_value = compute_threshold(top_indices[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value] + bbox.append([w + min(filtered_indices), w + max(filtered_indices)]) + + return bbox + +def uniform_box(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + return uniform_boxes + + +def align_frame_into_steps(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + average_width = int(sim.shape[1] // sim.shape[0] / 2) + # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # bbox = get_mode_box(sim, topk, w, ratio) + + top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True) + bbox = [] + for i in range(top_indices.shape[0]): + filtered_indices = remove_outliers(top_indices[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) < 2: + filtered_indices = remove_outliers(top_indices[i].tolist(), 2*threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append([top_indices[0] - average_width, top_indices[0] + average_width]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +# use optimization to compute pseudo boundary +def align_frame_into_steps_op(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1): + # frame_features: torch.Size([200, 768]) + augment_ratio_list = np.arange(0.5, 2, 0.1) + + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + # breakpoint() + # [#step, #frame] + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + + num_steps, num_frames = similarity_matrix.shape + + # Select top-k frames for each caption [#step, #topk] + sorted_indices = np.argsort(similarity_matrix, axis=1) + # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] + # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + + # Compute center indexes [#step, 1] + + + # Update boundary width + initial_boundary_width = num_frames / num_steps # 1 + # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 + # overlap = np.zeros(num_steps) + + for i in range(num_iterations): + if i == 0 and not order: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + # # overlap_weight = 0 + else: + if i == 0: + segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int) + start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:] + start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames) + boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1) + + filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)] + if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + else: + boundary_width_last = boundary_width.reshape(-1, 1, 1) + start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames) + + topk_indices = [] + topk_values = [] + for j, (start, end) in enumerate(zip(start_indices, end_indices)): + # breakpoint() + filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)] + topk_index = filtered_indices[-topk:] + topk_indices.append(topk_index) + topk_values.append(similarity_matrix[j][topk_index]) + previous_index_center = center_indexes.copy() if i > 0 else None + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + + # top_indices = sorted_indices[:, ] + # previous_index_center = center_indexes + # # overlap_weight = 0.5 * np.sum(overlap) + + boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + # breakpoint() + + index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]] + + loss_candidates_list = [value[:, np.newaxis] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + + + # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + + # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates] + + if i == 0: + loss = loss_sim + # print('loss shape:', loss_sim.shape, loss.shape) + else: + # measure the overlap between boundaries given center and boundary width + overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] + # breakpoint() + # print(loss_sim.shape, overlap.shape) + loss = loss_sim + beta * overlap + # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim)) + # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) + # find the best boundary width + # breakpoint() + best_boundary_width_index = np.argmin(loss, axis=1) # [#step] + + # Use broadcasting to create row indices corresponding to each row + # row_indices = np.arange(num_steps)[:, np.newaxis] + # breakpoint() + # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) + boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step] + # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step] + boundary_width = np.array(boundary_width) + # print(boundary_width.shape) + + bbox = [] + left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames) + right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames) + # breakpoint() + bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int) + + return bbox.tolist() + +# use optimization to compute pseudo boundary +def align_frame_into_steps_op_v1(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1): + # frame_features: torch.Size([200, 768]) + augment_ratio_list = np.arange(0.5, 2, 0.1) + + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + # breakpoint() + # [#step, #frame] + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + + num_steps, num_frames = similarity_matrix.shape + + # Select top-k frames for each caption [#step, #topk] + sorted_indices = np.argsort(similarity_matrix, axis=1) + # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] + # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + + # Compute center indexes [#step, 1] + + + # Update boundary width + initial_boundary_width = num_frames / num_steps # 1 + # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 + # overlap = np.zeros(num_steps) + + for i in range(num_iterations): + if i == 0 and not order: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + # # overlap_weight = 0 + else: + if i == 0: + segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int) + start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:] + start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames) + boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1) + + filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)] + if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + else: + boundary_width_last = boundary_width.reshape(-1, 1, 1) + start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames) + + topk_indices = [] + topk_values = [] + for j, (start, end) in enumerate(zip(start_indices, end_indices)): + # breakpoint() + filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)] + topk_index = filtered_indices[-topk:] + topk_indices.append(topk_index) + topk_values.append(similarity_matrix[j][topk_index]) + previous_index_center = center_indexes.copy() if i > 0 else None + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + + # top_indices = sorted_indices[:, ] + # previous_index_center = center_indexes + # # overlap_weight = 0.5 * np.sum(overlap) + + boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + # breakpoint() + + index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]] + + weight_distance = [clip_array(index_distance[i], 0.5 * boundary_width_candidates[i]) for i in range(len(topk_indices))] # [[topk, 1]] + + loss_candidates_list = [value[:, np.newaxis] / weight_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + + + # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + + # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates] + + if i == 0: + loss = loss_sim + # print('loss shape:', loss_sim.shape, loss.shape) + else: + # measure the overlap between boundaries given center and boundary width + overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] + # breakpoint() + # print(loss_sim.shape, overlap.shape) + loss = loss_sim + beta * overlap + # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim)) + # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) + # find the best boundary width + # breakpoint() + best_boundary_width_index = np.argmin(loss, axis=1) # [#step] + + # Use broadcasting to create row indices corresponding to each row + # row_indices = np.arange(num_steps)[:, np.newaxis] + # breakpoint() + # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) + boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step] + # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step] + boundary_width = np.array(boundary_width) + # print(boundary_width.shape) + + bbox = [] + left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames) + right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames) + # breakpoint() + bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int) + + return bbox.tolist() + + + + + +# # use optimization to compute pseudo boundary +# def align_frame_into_steps_op_order(frame_features, step_features, topk=15, threshold=0.5, num_iterations=4, beta=1): +# # frame_features: torch.Size([200, 768]) +# augment_ratio_list = np.arange(0.5, 2, 0.1) + +# if step_features.shape[0] == 0: +# return -np.ones(frame_features.shape[0]) + +# # breakpoint() +# # [#step, #frame] +# similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + +# num_steps, num_frames = similarity_matrix.shape + +# # Select top-k frames for each caption [#step, #topk] +# top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] +# top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + +# # Compute center indexes [#step, 1] +# center_indexes = find_center_index(top_indices)[:, np.newaxis] + +# # Update boundary width +# initial_boundary_width = num_frames / num_steps # 1 +# # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 +# # overlap = np.zeros(num_steps) + +# for i in range(num_iterations): +# if i == 0: +# boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) +# # previous_index_center = None +# # # overlap_weight = 0 +# else: +# boundary_width_last = boundary_width.reshape(-1, 1, 1) +# previous_index_center = center_indexes +# # overlap_weight = 0.5 * np.sum(overlap) + +# boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + +# index_distance = np.abs(top_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + +# loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + +# if i == 0: +# loss = loss_sim # # [#step, #candidates] +# print('loss shape:', loss_sim.shape, loss.shape) +# else: +# # measure the overlap between boundaries given center and boundary width +# overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] +# loss = loss_sim + beta * overlap +# print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) +# # find the best boundary width +# # breakpoint() +# best_boundary_width = np.argmin(loss, axis=1) # [#step] +# # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) +# boundary_width = boundary_width_candidates[:,0][np.arange(num_steps), best_boundary_width] # [#step] +# # print(boundary_width.shape) + +# return center_indexes, boundary_width +# based on original code but change the method to compute center and std +def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_index = torch.argsort(-sim, dim=1) + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])] + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + + iter_bbox_loss = {} + for iter in range(iteration): + if iter == 0: + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + else: + refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox + + + # global: from all frames, local: from refined uniform boxes + + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + top_values_list = top_values_list_global + else: + top_indices_list = top_indices_list_local + top_values_list = top_values_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = compute_filtered_indices(top_indices_list[i].tolist(), top_values_list[i].tolist(), threshold) + if len(filtered_indices) == 0: + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + # compute bbox loss + bbox_loss_list = [compute_bbox_loss(top_indices_list[i], bbox[i], top_values_list[i]) for i in range(len(top_indices_list))] + bbox_loss = sum(bbox_loss_list) + iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox} + + # select the minimum bbox loss and bbox as output + min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss']) + min_loss = iter_bbox_loss[min_loss_iter]['loss'] + best_bbox = iter_bbox_loss[min_loss_iter]['bbox'] + + + return (best_bbox, min_loss) + + + +# pesudo box 4: based on fixed window. the result is bad. give up +def align_frame_into_steps_mode(frame_features, step_features, topk=15, w=2, ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # frame_features, step_features = frame_features.cpu(), step_features.cpu() + + + bbox = get_mode_box(sim, topk, w, ratio) + return bbox + +def uniform_window(frame_num, step_num): + uniform_timestamps = torch.linspace(0, frame_num, step_num + 1) + uniform_timestamps = torch.round(uniform_timestamps).int().tolist() + bbox = [] + for i in range(step_num): + bbox.append([uniform_timestamps[i], uniform_timestamps[i+1] - 1]) + + # window_size = frame_num // step_num + # bbox = [] + # for i in range(step_num): + # bbox.append([i * window_size, (i + 1) * window_size - 1]) + # bbox[-1][1] = frame_num - 1 + return bbox + +def expand_window(uniform_bbox, frame_num, step_num, ratio=1): + '''ratio: gt box相对uniform box的波动范围 超过这个范围视为不可能 ratio单位为一个caption的平均长度''' + window_size = frame_num // step_num + refined_bbox = [] + for bbox in uniform_bbox: + start = max(0, bbox[0] - ratio * window_size) + end = min(frame_num - 1, bbox[1] + ratio * window_size) + refined_bbox.append([start, end]) + return refined_bbox + +# pesudo box 3: based on sim, consider the order of steps +def align_frame_into_steps_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + # old setting (index is wrong) + # # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])] + # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list] + # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + + sorted_index = torch.argsort(-sim, dim=1) + # global: from all frames, local: from refined uniform boxes + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + + + + +# based on pbox3, if ratio 1 has enough value, use it otherwise +def align_frame_into_steps_order_adapt(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + # old setting (index is wrong) + # # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])] + # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list] + # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + + sorted_index = torch.argsort(-sim, dim=1) + # global: from all frames, local: from refined uniform boxes + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-1) * len(size_local): + flag = 0 + for i in range(4): + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio+i*0.5) + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) >= (topk-1) * len(size_local): + flag = 1 + break + if flag == 0: + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + +def step_retrieval_weight_sim_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # breakpoint() + + window_sums = torch.nn.functional.conv1d(sim.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + if len(window_sums.shape) == 1: + window_sums = window_sums.unsqueeze(0) + + + sorted_index = torch.argsort(-window_sums, dim=1) + w + + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + threshold_value = compute_threshold(top_indices_list[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices_list[i].tolist() if abs(frame - top_indices_list[i][0]) <= threshold_value] + if len(filtered_indices) == 0: + bbox.append([top_indices_list[i] - w, top_indices_list[i] + w]) + else: + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + +# pesudo box 0: based on dtw +def segment_video_into_steps(frame_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值 + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) # 直接找与每个step最匹配的frame 这样原则上是一对一匹配 + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度 + return optimal_assignment + +def align_query_into_steps(query_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(query_features.shape[0]) + + sim = compute_sim(step_features, query_features, config_eval_l2norm).cpu() + query_features, step_features = query_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值 + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) # 直接找与每个step最匹配的frame 这样原则上是一对一匹配 + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, one_to_one=True, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度 + return optimal_assignment + +# inference时 video和slots之间的匹配 +def segment_video_into_slots(video_features, pred_steps): + sim = compute_sim(pred_steps, video_features, l2_norm=config_eval_l2norm).detach() + if config_eval_fixed_drop_sim == -1: + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + else: + baseline_logit = torch.tensor(config_eval_fixed_drop_sim) + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + x_drop_costs = -baseline_logits.squeeze() + zx_costs = -sim + + z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze() + zx_costs = zx_costs - z_drop_costs[0].reshape([1, 1]) + z_drop_costs = z_drop_costs - z_drop_costs[0] + x_drop_costs = x_drop_costs - x_drop_costs[0] + segmentation = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy(), return_labels=True) - 1 + return segmentation + + +# get_index and alignment_to_boundary are used for 'align' based manner +def get_index(alignment): + start_idx, end_idx = [], [] + for i in range(len(alignment)): + if alignment[i] == -1: + if i != 0 and alignment[i-1] != -1: + end_idx.append(i-1) + continue + if i == 0: + start_idx.append(i) + elif alignment[i] != alignment[i-1]: + start_idx.append(i) + if alignment[i-1] != -1: + end_idx.append(i-1) + if i == len(alignment) - 1: + end_idx.append(i) + assert len(start_idx) == len(end_idx) + for s, e in zip(start_idx, end_idx): + assert alignment[s] <= alignment[e] + return start_idx, end_idx + +def alignment_to_boundary(alignment, video_frame_num): + start_idx, end_idx = get_index(alignment) + start_time = start_idx / video_frame_num + end_time = end_idx / video_frame_num + boundaries = list(zip(start_time, end_time)) + + return np.float32(np.stack(boundaries, axis=0)) + + +def to_center_duration(alignments): + new_alignments = [] + for alignment in alignments: + start, end = alignment[:, 0], alignment[:, 1] + center = (start + end) / 2 + duration = end - start + alignment[:, 0], alignment[:, 1] = center, duration + new_alignments.append(alignment) + return new_alignments \ No newline at end of file diff --git a/anet_clip/backup/pdvc/video_segmentation_ori.py b/anet_clip/backup/pdvc/video_segmentation_ori.py new file mode 100644 index 0000000000000000000000000000000000000000..9d06e59f3b5a80fb4e8a765d20287175b03568d4 --- /dev/null +++ b/anet_clip/backup/pdvc/video_segmentation_ori.py @@ -0,0 +1,127 @@ +import torch +import numpy as np +import statistics + +from pdvc.dp.exact_dp import drop_dtw +from pdvc.dp.dp_utils import compute_sim +import statistics +from sklearn.cluster import KMeans + + +config_eval_l2norm = True +config_eval_keep_percentile = 0.48 # Calculated from the data +config_eval_fixed_drop_sim = -1 + +def segment_video_into_steps(frame_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, l2_norm=True).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 + return optimal_assignment # [num_frames] + +def get_index(alignment): + start_idx, end_idx = [], [] + for i in range(len(alignment)): + if alignment[i] == -1: + if i != 0 and alignment[i-1] != -1: + end_idx.append(i-1) + continue + if i == 0: + start_idx.append(i) + elif alignment[i] != alignment[i-1]: + start_idx.append(i) + if alignment[i-1] != -1: + end_idx.append(i-1) + if i == len(alignment) - 1: + end_idx.append(i) + assert len(start_idx) == len(end_idx) + for s, e in zip(start_idx, end_idx): + assert alignment[s] <= alignment[e] + return start_idx, end_idx + +def get_index_update(alignment): + optimal_alignment = np.append(np.insert(alignment, 0, -1), -1) + diff_optimal_alignment = np.diff(optimal_alignment) + + optimal_alignment_end = optimal_alignment.copy() + optimal_alignment_end[optimal_alignment_end==-1] = max(optimal_alignment_end) + 1 + diff_optimal_alignment_end = np.diff(optimal_alignment_end) + + start_idx = np.where(diff_optimal_alignment>0)[0] + end_idx = np.where(diff_optimal_alignment_end>0)[0] - 1 + return start_idx, end_idx + +def alignment_to_boundary(alignment, video_frame_num): + start_idx, end_idx = get_index(alignment) + start_time = start_idx / video_frame_num + end_time = end_idx / video_frame_num + boundaries = list(zip(start_time, end_time)) + + return np.float32(np.stack(boundaries, axis=0)) + + +def to_center_duration(alignments): + new_alignments = [] + for alignment in alignments: + start, end = alignment[:, 0], alignment[:, 1] + center = (start + end) / 2 + duration = end - start + alignment[:, 0], alignment[:, 1] = center, duration + new_alignments.append(alignment) + return new_alignments + + +def remove_outliers(indices, threshold): + # Calculate the mean and standard deviation of the indices + median = statistics.median(indices) + mean = sum(indices) / len(indices) + std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5 + + # Calculate the threshold for identifying outliers + threshold_value = threshold * std_dev + + # Filter out indices that are far from the mean + filtered_indices = [i for i in indices if abs(i - median) <= threshold_value] + + return filtered_indices + + +def align_frame_into_steps(frame_features, step_features, unordered=False, k=15, threshold=0.5): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, True).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + bbox = [] + for i in range(top_indices.shape[0]): + filtered_indices = remove_outliers(top_indices[i].tolist(), threshold) + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +if __name__ == '__main__': + # frame_features = torch.randn(100, 768) + # text_features = torch.randn(8, 768) + # alignment = segment_video_into_steps(frame_features, text_features) + # breakpoint() + arr = [-1,-1,0,1,2,2,2,-1,-1,3,4,4,-1,-1,5,5,5,-1,6,6,7,-1,-1, 8, 8, 9] + start, end = get_index(arr) + start_1, end_1 = get_index_update(arr) + # start = [2, 3, 4, 8, 9, 13, 16, 18] + # end = [2, 3, 5, 8, 10, 15, 17, 18] + breakpoint() diff --git a/anet_clip/backup/test.py b/anet_clip/backup/test.py new file mode 100644 index 0000000000000000000000000000000000000000..e1dcf9d7be821a3db142566cb23914ea96f1c064 --- /dev/null +++ b/anet_clip/backup/test.py @@ -0,0 +1,64 @@ +# from pdvc.video_segmentation import align_frame_into_steps_op +# import torch + +# # create two tensors +# frame = torch.rand(200, 768) +# steps = torch.rand(10, 768) + +# bboxs = align_frame_into_steps_op(frame, steps, order=False) +# # breakpoint() +# print('done!') + + +# ================================================================== +# import json + +# filepath = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1/info.json" +# with open(filepath, 'r') as f: +# data = json.load(f) + +# val_history = data['history']['val_result_history'] + +# metric_sum = {} +# metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] +# for k, v in val_history.items(): +# metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) +# print(f"{k}: {metric_sum[k]}") + +# best_epoch = max(metric_sum, key=metric_sum.get) +# print(val_history[best_epoch]['eval_score']) +# # write the val_history to a file +# with open('val.log', 'w') as f: +# for k, v in val_history[best_epoch]['eval_score'].items(): +# f.write(f"{k}: {v}\n") +# # print(metric_sum) +# # breakpoint() +# print('done!') + +# ================================================================== +import os +import json +import sys +sys.path.append('/mnt/data/Gvlab/wuhao/code/dibs') +from misc.utils import create_logger +save_folder = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1" + +val_logger = create_logger(save_folder, 'val.log') +infos_path = os.path.join(save_folder, 'info.json') + +with open(infos_path, 'r') as f: + data = json.load(f) +val_history = data['history']['val_result_history'] + +metric_sum = {} +metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] +for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + +best_epoch = max(metric_sum, key=metric_sum.get) +best_val_score = val_history[best_epoch]['eval_score'] +val_logger.info(f"Best epoch: {best_epoch}") +print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) +val_logger.info('\nBest Model Performance:\n' + print_info) +val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) \ No newline at end of file diff --git a/anet_clip/backup/train.py b/anet_clip/backup/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8777c91ee32ec28365e2c7579d3d84fab8571135 --- /dev/null +++ b/anet_clip/backup/train.py @@ -0,0 +1,580 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"): + elements = [] + # breakpoint() + if len(opt.train_caption_file) == 2: + if 'puyu' in opt.train_caption_file[0]: + elements.append('howto_puyu') + elif 'mix' in opt.train_caption_file[0]: + elements.append('howto_mix') + else: + elements.append('howto_llama2') + elements.append('howto') + if 'yc2' in opt.train_caption_file[1]: + elements.append('yc2') + elif 'anet' in opt.train_caption_file[1]: + elements.append('anet') + else: + if 'yc2' in opt.train_caption_file: + elements.append('yc2') + elif 'anet' in opt.train_caption_file: + elements.append('anet') + elif 'howto' in opt.train_caption_file: + if 'puyu' in opt.train_caption_file: + elements.append('howto_puyu') + elif 'mix' in opt.train_caption_file: + elements.append('howto_mix') + else: + elements.append('howto_llama2') + # elements.append('howto') + + if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]: + elements.append('clip') + elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]: + elements.append('univl') + # add pbox parameters + pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop" + elements.append(pbox_type) + elements.append(f"top{opt.top_frames}") + elements.append(f"r{opt.width_ratio}") + elements.append(f"iter{opt.iteration}") + elements.append(f"th{opt.width_th}") + return os.path.join(save_folder, '_'.join(elements) + '.json') + + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + + + if path == path_backup: + if path.startswith('/mnt/data'): + pass + else: + # path = '/mnt' + path[6:] + print('map failed') + exit(1) + return path + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # try to load saved pbox + saved_path = construct_save_path(opt) + if os.path.exists(saved_path): + try: + with open(saved_path, 'r') as f: + model.pseudo_boxes = json.load(f) + except: + # delete the bad file + os.remove(saved_path) + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # if dt['video_key'][0] != 'LGArj9Do0xc': + # continue + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + # if trained_samples < 1714: + # trained_samples += 1 + # continue + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + try: + output, loss = model(dt, criterion, contrastive_criterion) + except Exception as e: + print(e) + print(dt['video_key']) + continue + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + # breakpoint() + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + + if epoch == 1 and model.pseudo_boxes is not None and 'hyper' not in opt.train_caption_file[0]: + # save the pseudo boxes + pbox_save_path = construct_save_path(opt) + if not os.path.exists(pbox_save_path): + with open(pbox_save_path, 'w') as f: + json.dump(model.pseudo_boxes, f) + + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # save the pesudo box + + + + # # ===============================old code============================================== + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + # =================================new code========================================================= + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) + + break + + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + # breakpoint() + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_fewshot.py b/anet_clip/backup/train_fewshot.py new file mode 100644 index 0000000000000000000000000000000000000000..db60bfe68fc32d3da5df89f5af1201a7151a3e8a --- /dev/null +++ b/anet_clip/backup/train_fewshot.py @@ -0,0 +1,482 @@ +# use ft_gt_percent to control the percentage of gt proposals used for finetuning + +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_dataset = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # if dt['video_key'][0] != 'LGArj9Do0xc': + # continue + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + # if trained_samples < 1714: + # trained_samples += 1 + # continue + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_ft.py b/anet_clip/backup/train_ft.py new file mode 100644 index 0000000000000000000000000000000000000000..bdcc497f763607f28dfb1e0a687705c42e448a09 --- /dev/null +++ b/anet_clip/backup/train_ft.py @@ -0,0 +1,513 @@ +# coding:utf-8 + +''' +train_seq2.py is different from train_seq.py in the following aspects: + +1. train_seq2.py uses the same dataset for pretraining and target task +2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs +3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task +4. checkpoint is located in save howto_yc2_* or howto_tasty_* +5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_* +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet')) + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder): + print('the checkpoint folder does not exist') + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq2-ft' + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_ft2_gt.py b/anet_clip/backup/train_ft2_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..b007713a2ebbdae00dd0edaef54c41a3260279dd --- /dev/null +++ b/anet_clip/backup/train_ft2_gt.py @@ -0,0 +1,587 @@ +# coding:utf-8 + +''' +similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions) + +set pretrain_data_mode to 'single', it is same as train_ft_gt.py. + +使用全部的howto subset数据进行pretrain, 然后用部分的gt数据进行fine-tune +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single' + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + + + if path == path_backup: + if path.startswith('/mnt/data'): + pass + else: + # path = '/mnt' + path[6:] + print('map failed') + exit(1) + return path + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + opt.refine_pseudo_box = False + opt.pseudo_box_aug = False + # breakpoint() + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) # .replace('_seq2-ft', '') + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'vlep-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('vlep-yc2_yc2', 'vlep_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet')) + + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + # breakpoint() + + if opt.id_ori != '': + checkpoint_folder = checkpoint_folder + '_' + opt.id_ori + # breakpoint() + # if opt.id == "": + # pass + # else: + # checkpoint_folder = checkpoint_folder + '_' + opt.id + + if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_es20'): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) + + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id_ori = opt.id + + + opt.id = 'seq2-ft({})-gt_percent-{}'.format(pretrain_data_mode, opt.ft_gt_percent) + if opt.id_ori != '': + opt.id = opt.id + '_' + opt.id_ori + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_ft_gt.py b/anet_clip/backup/train_ft_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..b481c6eb9a19299b401fbe8ce82d10716a846a7c --- /dev/null +++ b/anet_clip/backup/train_ft_gt.py @@ -0,0 +1,516 @@ +# coding:utf-8 + +''' +train_seq2.py is different from train_seq.py in the following aspects: + +1. train_seq2.py uses the same dataset for pretraining and target task +2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs +3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task +4. checkpoint is located in save howto_yc2_* or howto_tasty_* +5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_* +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq2-ft-gt_percent-{}'.format(opt.ft_gt_percent) + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_pre_ft_gt.py b/anet_clip/backup/train_pre_ft_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..4e6c204b58c0fed4cca87004c6816d7830cee1cc --- /dev/null +++ b/anet_clip/backup/train_pre_ft_gt.py @@ -0,0 +1,537 @@ +# coding:utf-8 + +''' +similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions) + +set pretrain_data_mode to 'single', it is same as train_ft_gt.py. + + +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +pretrain_data_mode = 'single' # 'mix' or 'seq' or 'single' + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + opt.refine_pseudo_box = False + opt.pseudo_box_aug = False + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-train", save_folder) # .replace('_seq2-ft', '') + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-anet_anet', 'howto_anet')) + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_test'): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + # breakpoint() + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + opt.id = 'seq2-pre-{}-ft({})-gt'.format(opt.pre_percent, pretrain_data_mode) + assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_pre_perc.py b/anet_clip/backup/train_pre_perc.py new file mode 100644 index 0000000000000000000000000000000000000000..909dcdece82848854abf5f774b1d5f848f0a49eb --- /dev/null +++ b/anet_clip/backup/train_pre_perc.py @@ -0,0 +1,484 @@ +# coding:utf-8 +''' +cfgs is the same as train.py, but need add an extra argument: pre_percent +recommend value: 0.1, 0.2, 0.4, 0.6, 0.8, 1 +''' + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_subdata = PercentageSubsetDataset(train_dataset_1, opt.pre_percent) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_subdata, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_all = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_dataset = PercentageSubsetDataset(train_dataset_all, opt.pre_percent) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-pre_perc-{}'.format(opt.pre_percent) + assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0 + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_seq.py b/anet_clip/backup/train_seq.py new file mode 100644 index 0000000000000000000000000000000000000000..6a415e180bf2506f1cbef5ce6d0f6f4205e76203 --- /dev/null +++ b/anet_clip/backup/train_seq.py @@ -0,0 +1,457 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 25: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-train' + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/backup/train_seq_gt.py b/anet_clip/backup/train_seq_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..235ae3a83169787f2b2db87e71f0fabe2dbc2dc1 --- /dev/null +++ b/anet_clip/backup/train_seq_gt.py @@ -0,0 +1,480 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # Create the dataset with the specified percentage + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + + # # Create a DataLoader for the subset dataset + # subset_dataloader = DataLoader(subset_data, batch_size=64, shuffle=True) + + train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print(f'the script only support two dataset training while {len(opt.visual_feature_folder)} dataset folders are provided') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + opt.use_pseudo_box = False # True for howto, False for yc2/tasty, + opt.pseudo_box_aug = False + opt.refine_pseudo_box = False + # breakpoint() + + for train_loader in train_dataloaders: + opt.use_pseudo_box = not opt.use_pseudo_box + opt. + criterion.opt = opt + criterion.matcher.use_pseudo_box = opt.use_pseudo_box + + # if opt.use_pseudo_box: + # print('howto dataset') + # else: + # print('target dataset') + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 25: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-gt_percent_{}'.format(opt.ft_gt_percent) + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/anet_clip/info.json b/anet_clip/info.json new file mode 100644 index 0000000000000000000000000000000000000000..882983ce3374f2dc0e07ba72cc8c953647b5ce11 --- /dev/null +++ b/anet_clip/info.json @@ -0,0 +1 @@ +{"best": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/prediction/num4917_epoch19.json", "avg_proposal_num": -1, "Precision": 0.5612365263371945, "Recall": 0.5270524681293403}, "last": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.15656016917085527, "Bleu_2": 0.08210369852679855, "Bleu_3": 0.042491746140277446, "Bleu_4": 0.021149866989626908, "METEOR": 0.08752782819459405, "ROUGE_L": 0.1577032846084498, "CIDEr": 0.2687260839927409, "Recall": 0.4986985069085389, "Precision": 0.548450952477792, "soda_c": 0.045070258467165024, "para_Bleu_1": 0.36987086578065714, "para_Bleu_2": 0.1987998709052068, "para_Bleu_3": 0.11671522868501899, "para_Bleu_4": 0.07164097958462183, "para_METEOR": 0.13901753612789455, "para_ROUGE_L": 0.2826680559963382, "para_CIDEr": 0.0956891322121665, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.15965966113561106, "Bleu_2": 0.08785069799970043, "Bleu_3": 0.04739925348589703, "Bleu_4": 0.02377096308421814, "METEOR": 0.09062964515721111, "ROUGE_L": 0.1652647774491388, "CIDEr": 0.27366191469495676, "Recall": 0.45131293652113946, "Precision": 0.5379414954918249, "soda_c": 0.04303682007432423, "para_Bleu_1": 0.3640361416830845, "para_Bleu_2": 0.1986476696673755, "para_Bleu_3": 0.11814800235116821, "para_Bleu_4": 0.07336184523852665, "para_METEOR": 0.13911724177507803, "para_ROUGE_L": 0.28211794880017504, "para_CIDEr": 0.08634617454158834}}, "2": {"eval_score": {"Bleu_1": 0.15440507165989542, "Bleu_2": 0.08178273697953425, "Bleu_3": 0.042600749568780155, "Bleu_4": 0.02119123483046711, "METEOR": 0.08563216148714695, "ROUGE_L": 0.156809182143994, "CIDEr": 0.25960752079137744, "Recall": 0.5075951227720545, "Precision": 0.571834112941489, "soda_c": 0.048597974030683, "para_Bleu_1": 0.3985431504573892, "para_Bleu_2": 0.22415947108296613, "para_Bleu_3": 0.1341003834690626, "para_Bleu_4": 0.08312155143550452, "para_METEOR": 0.1510085678983445, "para_ROUGE_L": 0.2957598062989384, "para_CIDEr": 0.12271570278513648, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.16003947012491918, "Bleu_2": 0.08640386650819816, "Bleu_3": 0.045769192920880976, "Bleu_4": 0.023139762266241797, "METEOR": 0.08893476927946467, "ROUGE_L": 0.16285119298911696, "CIDEr": 0.27850058398714506, "Recall": 0.4974410652224822, "Precision": 0.571762083926507, "soda_c": 0.04898353247531122, "para_Bleu_1": 0.4116267700746525, "para_Bleu_2": 0.23315066082372427, "para_Bleu_3": 0.139785630195007, "para_Bleu_4": 0.08689414164874545, "para_METEOR": 0.15321412716959742, "para_ROUGE_L": 0.2993749803089721, "para_CIDEr": 0.12755194391496638, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.1612752203314224, "Bleu_2": 0.08712092952271142, "Bleu_3": 0.04643407984417907, "Bleu_4": 0.024237450149938583, "METEOR": 0.0888552980469009, "ROUGE_L": 0.16165678007821221, "CIDEr": 0.28844655875134945, "Recall": 0.5079771255793173, "Precision": 0.5707494407158785, "soda_c": 0.05143467092505771, "para_Bleu_1": 0.425828341023263, "para_Bleu_2": 0.2431293051387748, "para_Bleu_3": 0.14662751878582, "para_Bleu_4": 0.09131956416083617, "para_METEOR": 0.15868276543147294, "para_ROUGE_L": 0.30762031965083425, "para_CIDEr": 0.1438790695271004, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.16203040821313286, "Bleu_2": 0.087418866671477, "Bleu_3": 0.04641401855891123, "Bleu_4": 0.023872355329811287, "METEOR": 0.08736154709181514, "ROUGE_L": 0.16095171754962678, "CIDEr": 0.3019460931650574, "Recall": 0.5237442505746305, "Precision": 0.5691986983933232, "soda_c": 0.05366939846142926, "para_Bleu_1": 0.4285515683378188, "para_Bleu_2": 0.24896313523930838, "para_Bleu_3": 0.15083849533584295, "para_Bleu_4": 0.09425440122753082, "para_METEOR": 0.15418242275887206, "para_ROUGE_L": 0.3037081433191389, "para_CIDEr": 0.16822639157343386, "avg_proposal_number": -1}}, "6": {"eval_score": {"Bleu_1": 0.17095715677415013, "Bleu_2": 0.0951967897773989, "Bleu_3": 0.05145074727592996, "Bleu_4": 0.026686223548170303, "METEOR": 0.09033289555302068, "ROUGE_L": 0.16939818741017104, "CIDEr": 0.33299543538258497, "Recall": 0.5001550726802355, "Precision": 0.5629321740898863, "soda_c": 0.05378783144134501, "para_Bleu_1": 0.44719474980697405, "para_Bleu_2": 0.2615784516531111, "para_Bleu_3": 0.15956746990786394, "para_Bleu_4": 0.09983770060804388, "para_METEOR": 0.15549284849496958, "para_ROUGE_L": 0.30852597622578265, "para_CIDEr": 0.18758102150887232, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.16525493799366836, "Bleu_2": 0.09017429361474327, "Bleu_3": 0.04843073565357156, "Bleu_4": 0.025752141227780294, "METEOR": 0.09042668571725655, "ROUGE_L": 0.1657835735936403, "CIDEr": 0.30766696683798356, "Recall": 0.5070758476264831, "Precision": 0.5698723815334497, "soda_c": 0.05193286444599829, "para_Bleu_1": 0.4299765573510605, "para_Bleu_2": 0.24998607326423264, "para_Bleu_3": 0.15168978606887273, "para_Bleu_4": 0.09540463753102806, "para_METEOR": 0.15913054274631774, "para_ROUGE_L": 0.30821511076520103, "para_CIDEr": 0.14655297481419807}}, "8": {"eval_score": {"Bleu_1": 0.1659435247550983, "Bleu_2": 0.09010888064116455, "Bleu_3": 0.04740925434645997, "Bleu_4": 0.023810200153797586, "METEOR": 0.0893691583245007, "ROUGE_L": 0.16481267120708817, "CIDEr": 0.3096929324572276, "Recall": 0.5271698247293078, "Precision": 0.5766981899532185, "soda_c": 0.05637593299631936, "para_Bleu_1": 0.4507795558374508, "para_Bleu_2": 0.2668765313566654, "para_Bleu_3": 0.16324000259413463, "para_Bleu_4": 0.10292908422008885, "para_METEOR": 0.163503434468027, "para_ROUGE_L": 0.3141109355407807, "para_CIDEr": 0.1830754815850521, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.16664911544364056, "Bleu_2": 0.09023295213839283, "Bleu_3": 0.04763940550902772, "Bleu_4": 0.02409205514859969, "METEOR": 0.0878588871148787, "ROUGE_L": 0.16401896184386325, "CIDEr": 0.31947446694949533, "Recall": 0.5282742157284517, "Precision": 0.5750796556165633, "soda_c": 0.05745241491068406, "para_Bleu_1": 0.46204429574393835, "para_Bleu_2": 0.2749900961045832, "para_Bleu_3": 0.1683879565471281, "para_Bleu_4": 0.10624339593597942, "para_METEOR": 0.16245439213508253, "para_ROUGE_L": 0.3162965936511474, "para_CIDEr": 0.20803178964320856, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.1671778590456048, "Bleu_2": 0.09077014613023152, "Bleu_3": 0.0476684747303012, "Bleu_4": 0.02445564298599047, "METEOR": 0.08933235383587503, "ROUGE_L": 0.1654660162888944, "CIDEr": 0.31886265111118334, "Recall": 0.5314017615268335, "Precision": 0.5831469052945512, "soda_c": 0.05853263249839839, "para_Bleu_1": 0.46544090189732323, "para_Bleu_2": 0.2789325258737778, "para_Bleu_3": 0.17172911957785325, "para_Bleu_4": 0.10903514181091935, "para_METEOR": 0.16550159188298816, "para_ROUGE_L": 0.3181118223429575, "para_CIDEr": 0.2056618808195008, "avg_proposal_number": -1}}, "11": {"eval_score": {"Bleu_1": 0.16560019346009094, "Bleu_2": 0.08934946581658681, "Bleu_3": 0.04692472826903507, "Bleu_4": 0.023331060597699706, "METEOR": 0.08861943572471001, "ROUGE_L": 0.16392659155605854, "CIDEr": 0.31177527957257306, "Recall": 0.5248955646301546, "Precision": 0.5713061826316813, "soda_c": 0.056694173808073595, "para_Bleu_1": 0.45551540477127933, "para_Bleu_2": 0.2725270289009415, "para_Bleu_3": 0.16731081427102573, "para_Bleu_4": 0.10555679460767188, "para_METEOR": 0.1665724805603667, "para_ROUGE_L": 0.31619749898051375, "para_CIDEr": 0.19719071969736374}}, "12": {"eval_score": {"Bleu_1": 0.16778675341331784, "Bleu_2": 0.09082555766488616, "Bleu_3": 0.047445681271689716, "Bleu_4": 0.02375280793420285, "METEOR": 0.08883520478698428, "ROUGE_L": 0.16531435721130755, "CIDEr": 0.31778343902267087, "Recall": 0.5273619026669621, "Precision": 0.5698181479221706, "soda_c": 0.05753856798988932, "para_Bleu_1": 0.4610381779339771, "para_Bleu_2": 0.2761144617772928, "para_Bleu_3": 0.16915034097081671, "para_Bleu_4": 0.10654029953240575, "para_METEOR": 0.16638305166981465, "para_ROUGE_L": 0.31710573495570465, "para_CIDEr": 0.19601570682645908}}, "13": {"eval_score": {"Bleu_1": 0.16683698969676453, "Bleu_2": 0.09036855967772307, "Bleu_3": 0.047484441130632896, "Bleu_4": 0.023876859658376735, "METEOR": 0.08814626862844692, "ROUGE_L": 0.16473003568483396, "CIDEr": 0.3189568758512915, "Recall": 0.5281546209817979, "Precision": 0.5704333604501349, "soda_c": 0.057417105431783064, "para_Bleu_1": 0.4580706340663244, "para_Bleu_2": 0.27372623489326064, "para_Bleu_3": 0.16745128920972313, "para_Bleu_4": 0.10550306643408856, "para_METEOR": 0.16656454278617736, "para_ROUGE_L": 0.31631873012989425, "para_CIDEr": 0.19724321819057877}}, "14": {"eval_score": {"Bleu_1": 0.16662144072598145, "Bleu_2": 0.08988753231411394, "Bleu_3": 0.04690847145308288, "Bleu_4": 0.023224274927987735, "METEOR": 0.08725158341768323, "ROUGE_L": 0.16364893754496343, "CIDEr": 0.32028824475030926, "Recall": 0.5260420675803493, "Precision": 0.5630584367161506, "soda_c": 0.057565785652999135, "para_Bleu_1": 0.46764194087144684, "para_Bleu_2": 0.2801629240374498, "para_Bleu_3": 0.1713033186995987, "para_Bleu_4": 0.10750827268624512, "para_METEOR": 0.16742715934059368, "para_ROUGE_L": 0.31858424377772926, "para_CIDEr": 0.2089956210595351, "avg_proposal_number": -1}}, "15": {"eval_score": {"Bleu_1": 0.16754398447821903, "Bleu_2": 0.08978801866243748, "Bleu_3": 0.046077601805781236, "Bleu_4": 0.02215727819941335, "METEOR": 0.08650894641812401, "ROUGE_L": 0.16425299709373153, "CIDEr": 0.3192637628790779, "Recall": 0.5308598805776927, "Precision": 0.5705477594739302, "soda_c": 0.059035206979637336, "para_Bleu_1": 0.4722129873397206, "para_Bleu_2": 0.2843271953295457, "para_Bleu_3": 0.17433620623201318, "para_Bleu_4": 0.10943737200004257, "para_METEOR": 0.16524483023272712, "para_ROUGE_L": 0.3180351825656492, "para_CIDEr": 0.2139382514781602, "avg_proposal_number": -1}}, "16": {"eval_score": {"Bleu_1": 0.16584280243722227, "Bleu_2": 0.08889969905794425, "Bleu_3": 0.04569298286173284, "Bleu_4": 0.021992960199339176, "METEOR": 0.08570833880397384, "ROUGE_L": 0.16234979503724006, "CIDEr": 0.3170462149966731, "Recall": 0.5273397281824633, "Precision": 0.5648989898989865, "soda_c": 0.058539462474976364, "para_Bleu_1": 0.4735378044184376, "para_Bleu_2": 0.2855599966961999, "para_Bleu_3": 0.17485842077678387, "para_Bleu_4": 0.10998333079246524, "para_METEOR": 0.16580782598840993, "para_ROUGE_L": 0.3184105968751349, "para_CIDEr": 0.2144083270960459, "avg_proposal_number": -1}}, "17": {"eval_score": {"Bleu_1": 0.16720622564646215, "Bleu_2": 0.08946643461131876, "Bleu_3": 0.04568137095423273, "Bleu_4": 0.022039722503534608, "METEOR": 0.08588931176535387, "ROUGE_L": 0.16315869782389542, "CIDEr": 0.32099741016990446, "Recall": 0.5265047853249455, "Precision": 0.5647345942647923, "soda_c": 0.05847424883094643, "para_Bleu_1": 0.47508155945278135, "para_Bleu_2": 0.2858233856765029, "para_Bleu_3": 0.17499503512152859, "para_Bleu_4": 0.11002968407978216, "para_METEOR": 0.16541373751181562, "para_ROUGE_L": 0.3190110890037882, "para_CIDEr": 0.21421557986951392}}, "18": {"eval_score": {"Bleu_1": 0.1662475028889873, "Bleu_2": 0.08895418147726737, "Bleu_3": 0.04559170272578064, "Bleu_4": 0.021869443641790748, "METEOR": 0.0853620749347768, "ROUGE_L": 0.16226693807975517, "CIDEr": 0.3203697867996399, "Recall": 0.5243080966273422, "Precision": 0.5592002237136435, "soda_c": 0.058066485957305666, "para_Bleu_1": 0.47302383939773723, "para_Bleu_2": 0.2848420020452884, "para_Bleu_3": 0.17477626094199183, "para_Bleu_4": 0.11005159892431456, "para_METEOR": 0.16474042555391544, "para_ROUGE_L": 0.31754161420686944, "para_CIDEr": 0.2082818020277855}}, "19": {"eval_score": {"Bleu_1": 0.16600244771432068, "Bleu_2": 0.08859363359362551, "Bleu_3": 0.045174799285766926, "Bleu_4": 0.021453706973694267, "METEOR": 0.08469975853590762, "ROUGE_L": 0.1615333099598977, "CIDEr": 0.3178372173219055, "Recall": 0.5270524681293403, "Precision": 0.5612365263371945, "soda_c": 0.05852570981425518, "para_Bleu_1": 0.47641872729084495, "para_Bleu_2": 0.28679556025023933, "para_Bleu_3": 0.1757988669447671, "para_Bleu_4": 0.11061748158923715, "para_METEOR": 0.1647238014039032, "para_ROUGE_L": 0.3182336912910021, "para_CIDEr": 0.21852415031403352, "avg_proposal_number": -1}}}, "loss_history": {"1000": {"loss_ce": 0.284, "loss_counter": 0.126, "loss_bbox": 0.117, "loss_giou": 0.275, "loss_self_iou": 0.126, "cardinality_error": 3.775, "loss_ce_0": 0.284, "loss_counter_0": 0.126, "loss_bbox_0": 0.118, "loss_giou_0": 0.276, "loss_self_iou_0": 0.126, "cardinality_error_0": 3.775, "loss_caption_0": 3.781, "loss_caption": 3.778, "total_loss": 18.585}, "2000": {"loss_ce": 0.287, "loss_counter": 0.119, "loss_bbox": 0.087, "loss_giou": 0.239, "loss_self_iou": 0.12, "cardinality_error": 3.705, "loss_ce_0": 0.289, "loss_counter_0": 0.118, "loss_bbox_0": 0.087, "loss_giou_0": 0.239, "loss_self_iou_0": 0.121, "cardinality_error_0": 3.705, "loss_caption_0": 3.682, "loss_caption": 3.675, "total_loss": 17.896}, "3000": {"loss_ce": 0.291, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.227, "loss_self_iou": 0.098, "cardinality_error": 3.705, "loss_ce_0": 0.292, "loss_counter_0": 0.122, "loss_bbox_0": 0.078, "loss_giou_0": 0.228, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.705, "loss_caption_0": 3.668, "loss_caption": 3.664, "total_loss": 17.771}, "4000": {"loss_ce": 0.289, "loss_counter": 0.126, "loss_bbox": 0.078, "loss_giou": 0.224, "loss_self_iou": 0.1, "cardinality_error": 3.784, "loss_ce_0": 0.291, "loss_counter_0": 0.127, "loss_bbox_0": 0.078, "loss_giou_0": 0.223, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.784, "loss_caption_0": 3.624, "loss_caption": 3.629, "total_loss": 17.579}, "5000": {"loss_ce": 0.285, "loss_counter": 0.121, "loss_bbox": 0.08, "loss_giou": 0.218, "loss_self_iou": 0.114, "cardinality_error": 3.674, "loss_ce_0": 0.287, "loss_counter_0": 0.121, "loss_bbox_0": 0.08, "loss_giou_0": 0.218, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.674, "loss_caption_0": 3.629, "loss_caption": 3.629, "total_loss": 17.526}, "6000": {"loss_ce": 0.292, "loss_counter": 0.13, "loss_bbox": 0.076, "loss_giou": 0.22, "loss_self_iou": 0.098, "cardinality_error": 3.786, "loss_ce_0": 0.293, "loss_counter_0": 0.129, "loss_bbox_0": 0.076, "loss_giou_0": 0.22, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.786, "loss_caption_0": 3.625, "loss_caption": 3.622, "total_loss": 17.555}, "7000": {"loss_ce": 0.292, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.215, "loss_self_iou": 0.097, "cardinality_error": 3.746, "loss_ce_0": 0.293, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.746, "loss_caption_0": 3.58, "loss_caption": 3.576, "total_loss": 17.319}, "8000": {"loss_ce": 0.288, "loss_counter": 0.129, "loss_bbox": 0.078, "loss_giou": 0.218, "loss_self_iou": 0.108, "cardinality_error": 3.754, "loss_ce_0": 0.288, "loss_counter_0": 0.128, "loss_bbox_0": 0.079, "loss_giou_0": 0.218, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.754, "loss_caption_0": 3.546, "loss_caption": 3.546, "total_loss": 17.209}, "9000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.078, "loss_giou": 0.219, "loss_self_iou": 0.1, "cardinality_error": 3.685, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.078, "loss_giou_0": 0.219, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.685, "loss_caption_0": 3.544, "loss_caption": 3.54, "total_loss": 17.2}, "10000": {"loss_ce": 0.293, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.22, "loss_self_iou": 0.101, "cardinality_error": 3.748, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.078, "loss_giou_0": 0.22, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.748, "loss_caption_0": 3.582, "loss_caption": 3.577, "total_loss": 17.376}, "11000": {"loss_ce": 0.29, "loss_counter": 0.124, "loss_bbox": 0.077, "loss_giou": 0.217, "loss_self_iou": 0.101, "cardinality_error": 3.788, "loss_ce_0": 0.292, "loss_counter_0": 0.123, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.788, "loss_caption_0": 3.446, "loss_caption": 3.443, "total_loss": 16.802}, "12000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.694, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.213, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.694, "loss_caption_0": 3.427, "loss_caption": 3.428, "total_loss": 16.701}, "13000": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.217, "loss_self_iou": 0.107, "cardinality_error": 3.689, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.689, "loss_caption_0": 3.464, "loss_caption": 3.461, "total_loss": 16.871}, "14000": {"loss_ce": 0.292, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.21, "loss_self_iou": 0.1, "cardinality_error": 3.663, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.211, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.663, "loss_caption_0": 3.414, "loss_caption": 3.41, "total_loss": 16.616}, "15000": {"loss_ce": 0.295, "loss_counter": 0.127, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.828, "loss_ce_0": 0.296, "loss_counter_0": 0.127, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.828, "loss_caption_0": 3.453, "loss_caption": 3.453, "total_loss": 16.836}, "16000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.073, "loss_giou": 0.206, "loss_self_iou": 0.105, "cardinality_error": 3.687, "loss_ce_0": 0.297, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.207, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.687, "loss_caption_0": 3.461, "loss_caption": 3.462, "total_loss": 16.803}, "17000": {"loss_ce": 0.3, "loss_counter": 0.127, "loss_bbox": 0.073, "loss_giou": 0.208, "loss_self_iou": 0.102, "cardinality_error": 3.791, "loss_ce_0": 0.3, "loss_counter_0": 0.127, "loss_bbox_0": 0.073, "loss_giou_0": 0.209, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.791, "loss_caption_0": 3.469, "loss_caption": 3.465, "total_loss": 16.864}, "18000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.074, "loss_giou": 0.205, "loss_self_iou": 0.107, "cardinality_error": 3.68, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.074, "loss_giou_0": 0.206, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.68, "loss_caption_0": 3.478, "loss_caption": 3.475, "total_loss": 16.859}, "19000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.073, "loss_giou": 0.207, "loss_self_iou": 0.099, "cardinality_error": 3.752, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.072, "loss_giou_0": 0.208, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.752, "loss_caption_0": 3.396, "loss_caption": 3.396, "total_loss": 16.585}, "20000": {"loss_ce": 0.303, "loss_counter": 0.128, "loss_bbox": 0.071, "loss_giou": 0.208, "loss_self_iou": 0.101, "cardinality_error": 3.804, "loss_ce_0": 0.304, "loss_counter_0": 0.128, "loss_bbox_0": 0.071, "loss_giou_0": 0.208, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.804, "loss_caption_0": 3.42, "loss_caption": 3.419, "total_loss": 16.684}, "21000": {"loss_ce": 0.298, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.202, "loss_self_iou": 0.101, "cardinality_error": 3.666, "loss_ce_0": 0.299, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.202, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.666, "loss_caption_0": 3.344, "loss_caption": 3.335, "total_loss": 16.294}, "22000": {"loss_ce": 0.293, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.201, "loss_self_iou": 0.109, "cardinality_error": 3.752, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.203, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.752, "loss_caption_0": 3.302, "loss_caption": 3.304, "total_loss": 16.116}, "23000": {"loss_ce": 0.299, "loss_counter": 0.128, "loss_bbox": 0.077, "loss_giou": 0.208, "loss_self_iou": 0.113, "cardinality_error": 3.803, "loss_ce_0": 0.299, "loss_counter_0": 0.128, "loss_bbox_0": 0.076, "loss_giou_0": 0.208, "loss_self_iou_0": 0.112, "cardinality_error_0": 3.803, "loss_caption_0": 3.348, "loss_caption": 3.34, "total_loss": 16.363}, "24000": {"loss_ce": 0.293, "loss_counter": 0.122, "loss_bbox": 0.076, "loss_giou": 0.207, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.294, "loss_counter_0": 0.122, "loss_bbox_0": 0.076, "loss_giou_0": 0.207, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 3.354, "loss_caption": 3.351, "total_loss": 16.364}, "25000": {"loss_ce": 0.294, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.213, "loss_self_iou": 0.091, "cardinality_error": 3.734, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.077, "loss_giou_0": 0.214, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.734, "loss_caption_0": 3.372, "loss_caption": 3.372, "total_loss": 16.494}, "26000": {"loss_ce": 0.298, "loss_counter": 0.125, "loss_bbox": 0.072, "loss_giou": 0.203, "loss_self_iou": 0.096, "cardinality_error": 3.784, "loss_ce_0": 0.299, "loss_counter_0": 0.125, "loss_bbox_0": 0.073, "loss_giou_0": 0.204, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.784, "loss_caption_0": 3.334, "loss_caption": 3.333, "total_loss": 16.279}, "27000": {"loss_ce": 0.289, "loss_counter": 0.118, "loss_bbox": 0.076, "loss_giou": 0.203, "loss_self_iou": 0.102, "cardinality_error": 3.64, "loss_ce_0": 0.291, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.203, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.64, "loss_caption_0": 3.348, "loss_caption": 3.345, "total_loss": 16.287}, "28000": {"loss_ce": 0.292, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.201, "loss_self_iou": 0.095, "cardinality_error": 3.774, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.076, "loss_giou_0": 0.202, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.774, "loss_caption_0": 3.337, "loss_caption": 3.333, "total_loss": 16.249}, "29000": {"loss_ce": 0.298, "loss_counter": 0.12, "loss_bbox": 0.075, "loss_giou": 0.204, "loss_self_iou": 0.1, "cardinality_error": 3.755, "loss_ce_0": 0.299, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.205, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.755, "loss_caption_0": 3.315, "loss_caption": 3.321, "total_loss": 16.223}, "30000": {"loss_ce": 0.302, "loss_counter": 0.119, "loss_bbox": 0.071, "loss_giou": 0.195, "loss_self_iou": 0.103, "cardinality_error": 3.72, "loss_ce_0": 0.302, "loss_counter_0": 0.119, "loss_bbox_0": 0.072, "loss_giou_0": 0.196, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.72, "loss_caption_0": 3.347, "loss_caption": 3.349, "total_loss": 16.283}, "31000": {"loss_ce": 0.296, "loss_counter": 0.123, "loss_bbox": 0.073, "loss_giou": 0.202, "loss_self_iou": 0.114, "cardinality_error": 3.772, "loss_ce_0": 0.296, "loss_counter_0": 0.123, "loss_bbox_0": 0.074, "loss_giou_0": 0.203, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.772, "loss_caption_0": 3.24, "loss_caption": 3.242, "total_loss": 15.889}, "32000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.093, "cardinality_error": 3.66, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.195, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.66, "loss_caption_0": 3.251, "loss_caption": 3.248, "total_loss": 15.869}, "33000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.787, "loss_ce_0": 0.301, "loss_counter_0": 0.126, "loss_bbox_0": 0.071, "loss_giou_0": 0.199, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.787, "loss_caption_0": 3.223, "loss_caption": 3.225, "total_loss": 15.81}, "34000": {"loss_ce": 0.297, "loss_counter": 0.121, "loss_bbox": 0.076, "loss_giou": 0.201, "loss_self_iou": 0.107, "cardinality_error": 3.719, "loss_ce_0": 0.296, "loss_counter_0": 0.121, "loss_bbox_0": 0.077, "loss_giou_0": 0.202, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.719, "loss_caption_0": 3.21, "loss_caption": 3.206, "total_loss": 15.752}, "35000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.074, "loss_giou": 0.201, "loss_self_iou": 0.1, "cardinality_error": 3.761, "loss_ce_0": 0.304, "loss_counter_0": 0.121, "loss_bbox_0": 0.073, "loss_giou_0": 0.202, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.761, "loss_caption_0": 3.261, "loss_caption": 3.267, "total_loss": 16.006}, "36000": {"loss_ce": 0.302, "loss_counter": 0.12, "loss_bbox": 0.074, "loss_giou": 0.202, "loss_self_iou": 0.096, "cardinality_error": 3.731, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.203, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.731, "loss_caption_0": 3.322, "loss_caption": 3.322, "total_loss": 16.237}, "37000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.088, "cardinality_error": 3.747, "loss_ce_0": 0.306, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.195, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.747, "loss_caption_0": 3.276, "loss_caption": 3.278, "total_loss": 16.005}, "38000": {"loss_ce": 0.295, "loss_counter": 0.122, "loss_bbox": 0.073, "loss_giou": 0.198, "loss_self_iou": 0.096, "cardinality_error": 3.747, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.074, "loss_giou_0": 0.199, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.747, "loss_caption_0": 3.26, "loss_caption": 3.267, "total_loss": 15.944}, "39000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.073, "loss_giou": 0.194, "loss_self_iou": 0.096, "cardinality_error": 3.714, "loss_ce_0": 0.3, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.196, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 3.29, "loss_caption": 3.284, "total_loss": 16.029}, "40000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.068, "loss_giou": 0.187, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.189, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 3.255, "loss_caption": 3.258, "total_loss": 15.861}, "41000": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.196, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.071, "loss_giou_0": 0.197, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 3.159, "loss_caption": 3.162, "total_loss": 15.549}, "42000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.698, "loss_ce_0": 0.298, "loss_counter_0": 0.116, "loss_bbox_0": 0.071, "loss_giou_0": 0.189, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.698, "loss_caption_0": 3.191, "loss_caption": 3.187, "total_loss": 15.571}, "43000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.198, "loss_self_iou": 0.089, "cardinality_error": 3.785, "loss_ce_0": 0.306, "loss_counter_0": 0.119, "loss_bbox_0": 0.069, "loss_giou_0": 0.2, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.785, "loss_caption_0": 3.247, "loss_caption": 3.249, "total_loss": 15.93}, "44000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.072, "loss_giou": 0.194, "loss_self_iou": 0.104, "cardinality_error": 3.727, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.195, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.727, "loss_caption_0": 3.228, "loss_caption": 3.227, "total_loss": 15.794}, "45000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.194, "loss_self_iou": 0.094, "cardinality_error": 3.684, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.196, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.684, "loss_caption_0": 3.138, "loss_caption": 3.143, "total_loss": 15.458}, "46000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.194, "loss_self_iou": 0.107, "cardinality_error": 3.8, "loss_ce_0": 0.301, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.196, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.8, "loss_caption_0": 3.198, "loss_caption": 3.202, "total_loss": 15.69}, "47000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.071, "loss_giou": 0.193, "loss_self_iou": 0.1, "cardinality_error": 3.724, "loss_ce_0": 0.302, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.194, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.724, "loss_caption_0": 3.166, "loss_caption": 3.167, "total_loss": 15.544}, "48000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.074, "loss_giou": 0.194, "loss_self_iou": 0.1, "cardinality_error": 3.779, "loss_ce_0": 0.303, "loss_counter_0": 0.126, "loss_bbox_0": 0.073, "loss_giou_0": 0.195, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.779, "loss_caption_0": 3.197, "loss_caption": 3.204, "total_loss": 15.693}, "49000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.186, "loss_self_iou": 0.103, "cardinality_error": 3.67, "loss_ce_0": 0.299, "loss_counter_0": 0.117, "loss_bbox_0": 0.073, "loss_giou_0": 0.189, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.67, "loss_caption_0": 3.197, "loss_caption": 3.193, "total_loss": 15.597}, "50000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.191, "loss_self_iou": 0.1, "cardinality_error": 3.769, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 3.195, "loss_caption": 3.196, "total_loss": 15.646}, "51000": {"loss_ce": 0.304, "loss_counter": 0.119, "loss_bbox": 0.072, "loss_giou": 0.19, "loss_self_iou": 0.1, "cardinality_error": 3.708, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.708, "loss_caption_0": 3.123, "loss_caption": 3.122, "total_loss": 15.345}, "52000": {"loss_ce": 0.302, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.195, "loss_self_iou": 0.091, "cardinality_error": 3.787, "loss_ce_0": 0.302, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.198, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.787, "loss_caption_0": 3.08, "loss_caption": 3.08, "total_loss": 15.224}, "53000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.192, "loss_self_iou": 0.101, "cardinality_error": 3.688, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.688, "loss_caption_0": 3.121, "loss_caption": 3.125, "total_loss": 15.366}, "54000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.66, "loss_ce_0": 0.303, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.187, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.66, "loss_caption_0": 3.151, "loss_caption": 3.158, "total_loss": 15.44}, "55000": {"loss_ce": 0.314, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.759, "loss_ce_0": 0.314, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.759, "loss_caption_0": 3.137, "loss_caption": 3.138, "total_loss": 15.427}, "56000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.7, "loss_ce_0": 0.303, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.189, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.7, "loss_caption_0": 3.128, "loss_caption": 3.132, "total_loss": 15.353}, "57000": {"loss_ce": 0.308, "loss_counter": 0.125, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.833, "loss_ce_0": 0.308, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.833, "loss_caption_0": 3.157, "loss_caption": 3.154, "total_loss": 15.516}, "58000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.072, "loss_giou": 0.192, "loss_self_iou": 0.099, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.073, "loss_giou_0": 0.192, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.724, "loss_caption_0": 3.092, "loss_caption": 3.088, "total_loss": 15.209}, "59000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.092, "cardinality_error": 3.806, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.806, "loss_caption_0": 3.204, "loss_caption": 3.204, "total_loss": 15.668}, "60000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.73, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.074, "loss_giou_0": 0.198, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.73, "loss_caption_0": 3.185, "loss_caption": 3.179, "total_loss": 15.62}, "61000": {"loss_ce": 0.302, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.687, "loss_ce_0": 0.303, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.185, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.687, "loss_caption_0": 3.025, "loss_caption": 3.031, "total_loss": 14.914}, "62000": {"loss_ce": 0.305, "loss_counter": 0.125, "loss_bbox": 0.068, "loss_giou": 0.192, "loss_self_iou": 0.088, "cardinality_error": 3.809, "loss_ce_0": 0.304, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.809, "loss_caption_0": 3.067, "loss_caption": 3.064, "total_loss": 15.147}, "63000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.072, "loss_giou": 0.189, "loss_self_iou": 0.102, "cardinality_error": 3.636, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.073, "loss_giou_0": 0.193, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.636, "loss_caption_0": 3.09, "loss_caption": 3.083, "total_loss": 15.188}, "64000": {"loss_ce": 0.308, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.185, "loss_self_iou": 0.105, "cardinality_error": 3.738, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.067, "loss_giou_0": 0.186, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.738, "loss_caption_0": 3.09, "loss_caption": 3.088, "total_loss": 15.193}, "65000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.191, "loss_self_iou": 0.094, "cardinality_error": 3.735, "loss_ce_0": 0.304, "loss_counter_0": 0.123, "loss_bbox_0": 0.069, "loss_giou_0": 0.191, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.735, "loss_caption_0": 3.087, "loss_caption": 3.083, "total_loss": 15.203}, "66000": {"loss_ce": 0.307, "loss_counter": 0.121, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.095, "cardinality_error": 3.753, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.753, "loss_caption_0": 3.093, "loss_caption": 3.093, "total_loss": 15.235}, "67000": {"loss_ce": 0.299, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.189, "loss_self_iou": 0.099, "cardinality_error": 3.781, "loss_ce_0": 0.299, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.781, "loss_caption_0": 3.104, "loss_caption": 3.095, "total_loss": 15.24}, "68000": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.702, "loss_ce_0": 0.3, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.187, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.702, "loss_caption_0": 3.092, "loss_caption": 3.087, "total_loss": 15.171}, "69000": {"loss_ce": 0.304, "loss_counter": 0.116, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.087, "cardinality_error": 3.705, "loss_ce_0": 0.303, "loss_counter_0": 0.116, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.705, "loss_caption_0": 3.087, "loss_caption": 3.084, "total_loss": 15.154}, "70000": {"loss_ce": 0.308, "loss_counter": 0.119, "loss_bbox": 0.07, "loss_giou": 0.188, "loss_self_iou": 0.104, "cardinality_error": 3.763, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.763, "loss_caption_0": 3.137, "loss_caption": 3.142, "total_loss": 15.421}, "71000": {"loss_ce": 0.304, "loss_counter": 0.115, "loss_bbox": 0.067, "loss_giou": 0.187, "loss_self_iou": 0.091, "cardinality_error": 3.724, "loss_ce_0": 0.304, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.189, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.724, "loss_caption_0": 2.994, "loss_caption": 2.994, "total_loss": 14.812}, "72000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.099, "cardinality_error": 3.665, "loss_ce_0": 0.296, "loss_counter_0": 0.118, "loss_bbox_0": 0.072, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.665, "loss_caption_0": 2.995, "loss_caption": 3.0, "total_loss": 14.803}, "73000": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.067, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.762, "loss_ce_0": 0.302, "loss_counter_0": 0.122, "loss_bbox_0": 0.067, "loss_giou_0": 0.184, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.762, "loss_caption_0": 3.03, "loss_caption": 3.034, "total_loss": 14.924}, "74000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.181, "loss_self_iou": 0.093, "cardinality_error": 3.722, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.722, "loss_caption_0": 3.061, "loss_caption": 3.062, "total_loss": 15.037}, "75000": {"loss_ce": 0.3, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.835, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.835, "loss_caption_0": 3.102, "loss_caption": 3.108, "total_loss": 15.261}, "76000": {"loss_ce": 0.304, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.19, "loss_self_iou": 0.096, "cardinality_error": 3.787, "loss_ce_0": 0.305, "loss_counter_0": 0.118, "loss_bbox_0": 0.069, "loss_giou_0": 0.192, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.787, "loss_caption_0": 3.055, "loss_caption": 3.056, "total_loss": 15.081}, "77000": {"loss_ce": 0.3, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.191, "loss_self_iou": 0.101, "cardinality_error": 3.753, "loss_ce_0": 0.3, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.192, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.753, "loss_caption_0": 3.064, "loss_caption": 3.063, "total_loss": 15.105}, "78000": {"loss_ce": 0.303, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.812, "loss_ce_0": 0.302, "loss_counter_0": 0.118, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.812, "loss_caption_0": 3.075, "loss_caption": 3.081, "total_loss": 15.186}, "79000": {"loss_ce": 0.303, "loss_counter": 0.119, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.712, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.712, "loss_caption_0": 3.004, "loss_caption": 3.004, "total_loss": 14.833}, "80000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.639, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.069, "loss_giou_0": 0.185, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.639, "loss_caption_0": 3.011, "loss_caption": 3.021, "total_loss": 14.846}, "81000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.098, "cardinality_error": 3.664, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.664, "loss_caption_0": 2.972, "loss_caption": 2.974, "total_loss": 14.63}, "82000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.098, "cardinality_error": 3.692, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.692, "loss_caption_0": 2.914, "loss_caption": 2.912, "total_loss": 14.413}, "83000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.068, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.764, "loss_caption_0": 2.939, "loss_caption": 2.933, "total_loss": 14.562}, "84000": {"loss_ce": 0.299, "loss_counter": 0.119, "loss_bbox": 0.066, "loss_giou": 0.18, "loss_self_iou": 0.086, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.119, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.086, "cardinality_error_0": 3.724, "loss_caption_0": 2.964, "loss_caption": 2.963, "total_loss": 14.614}, "85000": {"loss_ce": 0.301, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.187, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.301, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.189, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 2.942, "loss_caption": 2.945, "total_loss": 14.596}, "86000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.067, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.764, "loss_caption_0": 2.989, "loss_caption": 2.988, "total_loss": 14.745}, "87000": {"loss_ce": 0.295, "loss_counter": 0.119, "loss_bbox": 0.067, "loss_giou": 0.178, "loss_self_iou": 0.096, "cardinality_error": 3.692, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.692, "loss_caption_0": 2.93, "loss_caption": 2.931, "total_loss": 14.465}, "88000": {"loss_ce": 0.299, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.102, "cardinality_error": 3.74, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.184, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.74, "loss_caption_0": 2.945, "loss_caption": 2.939, "total_loss": 14.538}, "89000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.096, "cardinality_error": 3.911, "loss_ce_0": 0.303, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.911, "loss_caption_0": 2.981, "loss_caption": 2.985, "total_loss": 14.762}, "90000": {"loss_ce": 0.298, "loss_counter": 0.113, "loss_bbox": 0.066, "loss_giou": 0.174, "loss_self_iou": 0.099, "cardinality_error": 3.667, "loss_ce_0": 0.3, "loss_counter_0": 0.112, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.667, "loss_caption_0": 2.946, "loss_caption": 2.945, "total_loss": 14.493}, "91000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.066, "loss_giou": 0.179, "loss_self_iou": 0.097, "cardinality_error": 3.807, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.807, "loss_caption_0": 2.916, "loss_caption": 2.914, "total_loss": 14.411}, "92000": {"loss_ce": 0.298, "loss_counter": 0.121, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.093, "cardinality_error": 3.784, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.784, "loss_caption_0": 2.916, "loss_caption": 2.915, "total_loss": 14.422}, "93000": {"loss_ce": 0.298, "loss_counter": 0.117, "loss_bbox": 0.065, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.806, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.183, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.806, "loss_caption_0": 2.9, "loss_caption": 2.905, "total_loss": 14.377}, "94000": {"loss_ce": 0.293, "loss_counter": 0.109, "loss_bbox": 0.068, "loss_giou": 0.174, "loss_self_iou": 0.105, "cardinality_error": 3.616, "loss_ce_0": 0.293, "loss_counter_0": 0.109, "loss_bbox_0": 0.069, "loss_giou_0": 0.178, "loss_self_iou_0": 0.106, "cardinality_error_0": 3.616, "loss_caption_0": 2.912, "loss_caption": 2.914, "total_loss": 14.339}, "95000": {"loss_ce": 0.295, "loss_counter": 0.12, "loss_bbox": 0.066, "loss_giou": 0.185, "loss_self_iou": 0.093, "cardinality_error": 3.805, "loss_ce_0": 0.296, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.805, "loss_caption_0": 2.938, "loss_caption": 2.941, "total_loss": 14.546}, "96000": {"loss_ce": 0.292, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.177, "loss_self_iou": 0.103, "cardinality_error": 3.684, "loss_ce_0": 0.293, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.181, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.684, "loss_caption_0": 2.928, "loss_caption": 2.931, "total_loss": 14.434}, "97000": {"loss_ce": 0.297, "loss_counter": 0.111, "loss_bbox": 0.066, "loss_giou": 0.184, "loss_self_iou": 0.095, "cardinality_error": 3.693, "loss_ce_0": 0.298, "loss_counter_0": 0.111, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.693, "loss_caption_0": 2.902, "loss_caption": 2.903, "total_loss": 14.392}, "98000": {"loss_ce": 0.296, "loss_counter": 0.115, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.089, "cardinality_error": 3.738, "loss_ce_0": 0.298, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.184, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.738, "loss_caption_0": 2.896, "loss_caption": 2.902, "total_loss": 14.361}, "99000": {"loss_ce": 0.295, "loss_counter": 0.115, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.095, "cardinality_error": 3.702, "loss_ce_0": 0.296, "loss_counter_0": 0.115, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.702, "loss_caption_0": 2.956, "loss_caption": 2.956, "total_loss": 14.525}, "100000": {"loss_ce": 0.296, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.092, "cardinality_error": 3.751, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.751, "loss_caption_0": 2.932, "loss_caption": 2.932, "total_loss": 14.453}, "101000": {"loss_ce": 0.29, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.699, "loss_ce_0": 0.292, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.699, "loss_caption_0": 2.849, "loss_caption": 2.847, "total_loss": 14.064}, "102000": {"loss_ce": 0.292, "loss_counter": 0.116, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.093, "cardinality_error": 3.695, "loss_ce_0": 0.293, "loss_counter_0": 0.117, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.695, "loss_caption_0": 2.85, "loss_caption": 2.848, "total_loss": 14.087}, "103000": {"loss_ce": 0.293, "loss_counter": 0.115, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.724, "loss_ce_0": 0.293, "loss_counter_0": 0.116, "loss_bbox_0": 0.067, "loss_giou_0": 0.178, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.724, "loss_caption_0": 2.846, "loss_caption": 2.854, "total_loss": 14.092}, "104000": {"loss_ce": 0.289, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.097, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.736, "loss_caption_0": 2.916, "loss_caption": 2.913, "total_loss": 14.362}, "105000": {"loss_ce": 0.288, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.736, "loss_caption_0": 2.907, "loss_caption": 2.902, "total_loss": 14.342}, "106000": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.11, "cardinality_error": 3.775, "loss_ce_0": 0.293, "loss_counter_0": 0.112, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.775, "loss_caption_0": 2.876, "loss_caption": 2.875, "total_loss": 14.264}, "107000": {"loss_ce": 0.291, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.178, "loss_self_iou": 0.099, "cardinality_error": 3.743, "loss_ce_0": 0.291, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.183, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.91, "loss_caption": 2.909, "total_loss": 14.358}, "108000": {"loss_ce": 0.295, "loss_counter": 0.118, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.1, "cardinality_error": 3.81, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.181, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.81, "loss_caption_0": 2.928, "loss_caption": 2.93, "total_loss": 14.446}, "109000": {"loss_ce": 0.294, "loss_counter": 0.118, "loss_bbox": 0.063, "loss_giou": 0.178, "loss_self_iou": 0.091, "cardinality_error": 3.78, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.78, "loss_caption_0": 2.916, "loss_caption": 2.912, "total_loss": 14.396}, "110000": {"loss_ce": 0.297, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.087, "cardinality_error": 3.72, "loss_ce_0": 0.297, "loss_counter_0": 0.113, "loss_bbox_0": 0.065, "loss_giou_0": 0.184, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.72, "loss_caption_0": 2.948, "loss_caption": 2.948, "total_loss": 14.539}, "111000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.095, "cardinality_error": 3.718, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.718, "loss_caption_0": 2.867, "loss_caption": 2.869, "total_loss": 14.14}, "112000": {"loss_ce": 0.287, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.725, "loss_ce_0": 0.289, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.725, "loss_caption_0": 2.844, "loss_caption": 2.842, "total_loss": 14.015}, "113000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.734, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.734, "loss_caption_0": 2.837, "loss_caption": 2.834, "total_loss": 13.981}, "114000": {"loss_ce": 0.283, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.739, "loss_ce_0": 0.285, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.739, "loss_caption_0": 2.855, "loss_caption": 2.857, "total_loss": 14.084}, "115000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.175, "loss_self_iou": 0.092, "cardinality_error": 3.74, "loss_ce_0": 0.284, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.18, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.74, "loss_caption_0": 2.823, "loss_caption": 2.824, "total_loss": 13.959}, "116000": {"loss_ce": 0.286, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.177, "loss_self_iou": 0.088, "cardinality_error": 3.753, "loss_ce_0": 0.288, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.753, "loss_caption_0": 2.846, "loss_caption": 2.843, "total_loss": 14.073}, "117000": {"loss_ce": 0.285, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.755, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.755, "loss_caption_0": 2.804, "loss_caption": 2.81, "total_loss": 13.896}, "118000": {"loss_ce": 0.284, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.093, "cardinality_error": 3.715, "loss_ce_0": 0.285, "loss_counter_0": 0.108, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.715, "loss_caption_0": 2.863, "loss_caption": 2.866, "total_loss": 14.129}, "119000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.064, "loss_giou": 0.176, "loss_self_iou": 0.098, "cardinality_error": 3.735, "loss_ce_0": 0.287, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.735, "loss_caption_0": 2.844, "loss_caption": 2.843, "total_loss": 14.061}, "120000": {"loss_ce": 0.284, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.175, "loss_self_iou": 0.101, "cardinality_error": 3.755, "loss_ce_0": 0.285, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.755, "loss_caption_0": 2.868, "loss_caption": 2.878, "total_loss": 14.168}, "121000": {"loss_ce": 0.283, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.095, "cardinality_error": 3.691, "loss_ce_0": 0.284, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.691, "loss_caption_0": 2.809, "loss_caption": 2.808, "total_loss": 13.835}, "122000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.17, "loss_self_iou": 0.093, "cardinality_error": 3.706, "loss_ce_0": 0.281, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.706, "loss_caption_0": 2.811, "loss_caption": 2.814, "total_loss": 13.867}, "123000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.691, "loss_ce_0": 0.281, "loss_counter_0": 0.11, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.691, "loss_caption_0": 2.789, "loss_caption": 2.797, "total_loss": 13.808}, "124000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.063, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.76, "loss_ce_0": 0.281, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.76, "loss_caption_0": 2.839, "loss_caption": 2.842, "total_loss": 13.984}, "125000": {"loss_ce": 0.281, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.763, "loss_ce_0": 0.282, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.763, "loss_caption_0": 2.81, "loss_caption": 2.815, "total_loss": 13.898}, "126000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.095, "cardinality_error": 3.717, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.183, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.717, "loss_caption_0": 2.789, "loss_caption": 2.787, "total_loss": 13.835}, "127000": {"loss_ce": 0.277, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.764, "loss_caption_0": 2.867, "loss_caption": 2.871, "total_loss": 14.097}, "128000": {"loss_ce": 0.281, "loss_counter": 0.113, "loss_bbox": 0.063, "loss_giou": 0.173, "loss_self_iou": 0.092, "cardinality_error": 3.793, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.793, "loss_caption_0": 2.868, "loss_caption": 2.863, "total_loss": 14.111}, "129000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.1, "cardinality_error": 3.686, "loss_ce_0": 0.283, "loss_counter_0": 0.105, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.686, "loss_caption_0": 2.812, "loss_caption": 2.813, "total_loss": 13.903}, "130000": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.772, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.772, "loss_caption_0": 2.86, "loss_caption": 2.861, "total_loss": 14.105}, "131000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.062, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.75, "loss_ce_0": 0.279, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.75, "loss_caption_0": 2.817, "loss_caption": 2.826, "total_loss": 13.897}, "132000": {"loss_ce": 0.271, "loss_counter": 0.109, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.089, "cardinality_error": 3.814, "loss_ce_0": 0.274, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.814, "loss_caption_0": 2.778, "loss_caption": 2.776, "total_loss": 13.726}, "133000": {"loss_ce": 0.277, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.095, "cardinality_error": 3.773, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.773, "loss_caption_0": 2.843, "loss_caption": 2.843, "total_loss": 13.999}, "134000": {"loss_ce": 0.273, "loss_counter": 0.108, "loss_bbox": 0.065, "loss_giou": 0.171, "loss_self_iou": 0.101, "cardinality_error": 3.743, "loss_ce_0": 0.276, "loss_counter_0": 0.107, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.786, "loss_caption": 2.787, "total_loss": 13.756}, "135000": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.794, "loss_ce_0": 0.281, "loss_counter_0": 0.115, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.794, "loss_caption_0": 2.785, "loss_caption": 2.784, "total_loss": 13.759}, "136000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.065, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.653, "loss_ce_0": 0.279, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.653, "loss_caption_0": 2.828, "loss_caption": 2.834, "total_loss": 13.919}, "137000": {"loss_ce": 0.279, "loss_counter": 0.105, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.099, "cardinality_error": 3.654, "loss_ce_0": 0.281, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.654, "loss_caption_0": 2.79, "loss_caption": 2.799, "total_loss": 13.806}, "138000": {"loss_ce": 0.278, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.171, "loss_self_iou": 0.095, "cardinality_error": 3.714, "loss_ce_0": 0.28, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.714, "loss_caption_0": 2.835, "loss_caption": 2.828, "total_loss": 13.945}, "139000": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.098, "cardinality_error": 3.813, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.813, "loss_caption_0": 2.83, "loss_caption": 2.828, "total_loss": 13.924}, "140000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.09, "cardinality_error": 3.664, "loss_ce_0": 0.28, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.664, "loss_caption_0": 2.821, "loss_caption": 2.823, "total_loss": 13.905}, "141000": {"loss_ce": 0.268, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.171, "loss_self_iou": 0.106, "cardinality_error": 3.774, "loss_ce_0": 0.27, "loss_counter_0": 0.108, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.774, "loss_caption_0": 2.75, "loss_caption": 2.748, "total_loss": 13.572}, "142000": {"loss_ce": 0.27, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.173, "loss_self_iou": 0.091, "cardinality_error": 3.797, "loss_ce_0": 0.272, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.797, "loss_caption_0": 2.72, "loss_caption": 2.722, "total_loss": 13.492}, "143000": {"loss_ce": 0.265, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.162, "loss_self_iou": 0.095, "cardinality_error": 3.637, "loss_ce_0": 0.268, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.171, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.637, "loss_caption_0": 2.782, "loss_caption": 2.782, "total_loss": 13.626}, "144000": {"loss_ce": 0.27, "loss_counter": 0.112, "loss_bbox": 0.062, "loss_giou": 0.172, "loss_self_iou": 0.094, "cardinality_error": 3.831, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.831, "loss_caption_0": 2.793, "loss_caption": 2.79, "total_loss": 13.773}, "145000": {"loss_ce": 0.269, "loss_counter": 0.101, "loss_bbox": 0.061, "loss_giou": 0.16, "loss_self_iou": 0.093, "cardinality_error": 3.665, "loss_ce_0": 0.273, "loss_counter_0": 0.101, "loss_bbox_0": 0.063, "loss_giou_0": 0.168, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.665, "loss_caption_0": 2.762, "loss_caption": 2.767, "total_loss": 13.554}, "146000": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.725, "loss_ce_0": 0.276, "loss_counter_0": 0.109, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.725, "loss_caption_0": 2.813, "loss_caption": 2.813, "total_loss": 13.811}, "147000": {"loss_ce": 0.272, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.097, "cardinality_error": 3.714, "loss_ce_0": 0.273, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 2.747, "loss_caption": 2.745, "total_loss": 13.578}, "148000": {"loss_ce": 0.271, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.728, "loss_ce_0": 0.274, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.728, "loss_caption_0": 2.843, "loss_caption": 2.84, "total_loss": 13.944}, "149000": {"loss_ce": 0.269, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.799, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.068, "loss_giou_0": 0.178, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.799, "loss_caption_0": 2.836, "loss_caption": 2.836, "total_loss": 13.926}, "150000": {"loss_ce": 0.27, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.087, "cardinality_error": 3.703, "loss_ce_0": 0.272, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.703, "loss_caption_0": 2.806, "loss_caption": 2.806, "total_loss": 13.795}, "151000": {"loss_ce": 0.264, "loss_counter": 0.101, "loss_bbox": 0.063, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.645, "loss_ce_0": 0.266, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.171, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.645, "loss_caption_0": 2.762, "loss_caption": 2.759, "total_loss": 13.537}, "152000": {"loss_ce": 0.265, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.087, "cardinality_error": 3.722, "loss_ce_0": 0.269, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.175, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.722, "loss_caption_0": 2.762, "loss_caption": 2.766, "total_loss": 13.59}, "153000": {"loss_ce": 0.264, "loss_counter": 0.111, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.083, "cardinality_error": 3.813, "loss_ce_0": 0.267, "loss_counter_0": 0.111, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.085, "cardinality_error_0": 3.813, "loss_caption_0": 2.777, "loss_caption": 2.778, "total_loss": 13.663}, "154000": {"loss_ce": 0.268, "loss_counter": 0.106, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.769, "loss_ce_0": 0.272, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.769, "loss_caption_0": 2.787, "loss_caption": 2.787, "total_loss": 13.717}, "155000": {"loss_ce": 0.264, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.09, "cardinality_error": 3.714, "loss_ce_0": 0.267, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.714, "loss_caption_0": 2.758, "loss_caption": 2.76, "total_loss": 13.593}, "156000": {"loss_ce": 0.265, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.167, "loss_self_iou": 0.102, "cardinality_error": 3.675, "loss_ce_0": 0.269, "loss_counter_0": 0.106, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.675, "loss_caption_0": 2.741, "loss_caption": 2.742, "total_loss": 13.504}, "157000": {"loss_ce": 0.267, "loss_counter": 0.104, "loss_bbox": 0.065, "loss_giou": 0.167, "loss_self_iou": 0.103, "cardinality_error": 3.722, "loss_ce_0": 0.268, "loss_counter_0": 0.104, "loss_bbox_0": 0.068, "loss_giou_0": 0.176, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.722, "loss_caption_0": 2.777, "loss_caption": 2.783, "total_loss": 13.668}, "158000": {"loss_ce": 0.266, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.099, "cardinality_error": 3.758, "loss_ce_0": 0.27, "loss_counter_0": 0.106, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.758, "loss_caption_0": 2.815, "loss_caption": 2.817, "total_loss": 13.789}, "159000": {"loss_ce": 0.272, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.729, "loss_ce_0": 0.275, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.729, "loss_caption_0": 2.783, "loss_caption": 2.785, "total_loss": 13.721}, "160000": {"loss_ce": 0.269, "loss_counter": 0.109, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.098, "cardinality_error": 3.816, "loss_ce_0": 0.271, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.816, "loss_caption_0": 2.78, "loss_caption": 2.784, "total_loss": 13.686}, "161000": {"loss_ce": 0.26, "loss_counter": 0.103, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.695, "loss_ce_0": 0.263, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.171, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.695, "loss_caption_0": 2.766, "loss_caption": 2.768, "total_loss": 13.553}, "162000": {"loss_ce": 0.262, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.694, "loss_ce_0": 0.266, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.694, "loss_caption_0": 2.768, "loss_caption": 2.764, "total_loss": 13.573}, "163000": {"loss_ce": 0.262, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.173, "loss_self_iou": 0.097, "cardinality_error": 3.769, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 2.765, "loss_caption": 2.766, "total_loss": 13.63}, "164000": {"loss_ce": 0.265, "loss_counter": 0.11, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.774, "loss_ce_0": 0.269, "loss_counter_0": 0.11, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.774, "loss_caption_0": 2.772, "loss_caption": 2.776, "total_loss": 13.625}, "165000": {"loss_ce": 0.264, "loss_counter": 0.102, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.699, "loss_ce_0": 0.267, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.699, "loss_caption_0": 2.711, "loss_caption": 2.716, "total_loss": 13.368}, "166000": {"loss_ce": 0.264, "loss_counter": 0.105, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.72, "loss_ce_0": 0.268, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.72, "loss_caption_0": 2.754, "loss_caption": 2.755, "total_loss": 13.534}, "167000": {"loss_ce": 0.261, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.095, "cardinality_error": 3.712, "loss_ce_0": 0.266, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.712, "loss_caption_0": 2.771, "loss_caption": 2.772, "total_loss": 13.617}, "168000": {"loss_ce": 0.265, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.09, "cardinality_error": 3.816, "loss_ce_0": 0.269, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.816, "loss_caption_0": 2.814, "loss_caption": 2.82, "total_loss": 13.826}, "169000": {"loss_ce": 0.258, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.106, "cardinality_error": 3.697, "loss_ce_0": 0.261, "loss_counter_0": 0.106, "loss_bbox_0": 0.067, "loss_giou_0": 0.176, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.697, "loss_caption_0": 2.769, "loss_caption": 2.775, "total_loss": 13.598}, "170000": {"loss_ce": 0.268, "loss_counter": 0.105, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.093, "cardinality_error": 3.799, "loss_ce_0": 0.272, "loss_counter_0": 0.106, "loss_bbox_0": 0.064, "loss_giou_0": 0.174, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.799, "loss_caption_0": 2.794, "loss_caption": 2.798, "total_loss": 13.727}, "171000": {"loss_ce": 0.256, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.161, "loss_self_iou": 0.094, "cardinality_error": 3.694, "loss_ce_0": 0.261, "loss_counter_0": 0.101, "loss_bbox_0": 0.064, "loss_giou_0": 0.169, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.694, "loss_caption_0": 2.772, "loss_caption": 2.77, "total_loss": 13.544}, "172000": {"loss_ce": 0.258, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.096, "cardinality_error": 3.667, "loss_ce_0": 0.262, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.667, "loss_caption_0": 2.741, "loss_caption": 2.743, "total_loss": 13.47}, "173000": {"loss_ce": 0.258, "loss_counter": 0.104, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.09, "cardinality_error": 3.753, "loss_ce_0": 0.261, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.753, "loss_caption_0": 2.786, "loss_caption": 2.785, "total_loss": 13.646}, "174000": {"loss_ce": 0.259, "loss_counter": 0.107, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.094, "cardinality_error": 3.832, "loss_ce_0": 0.261, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.832, "loss_caption_0": 2.733, "loss_caption": 2.738, "total_loss": 13.457}, "175000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.163, "loss_self_iou": 0.098, "cardinality_error": 3.731, "loss_ce_0": 0.259, "loss_counter_0": 0.103, "loss_bbox_0": 0.062, "loss_giou_0": 0.173, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.731, "loss_caption_0": 2.745, "loss_caption": 2.744, "total_loss": 13.454}, "176000": {"loss_ce": 0.261, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.095, "cardinality_error": 3.795, "loss_ce_0": 0.264, "loss_counter_0": 0.103, "loss_bbox_0": 0.064, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.795, "loss_caption_0": 2.761, "loss_caption": 2.77, "total_loss": 13.575}, "177000": {"loss_ce": 0.255, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.161, "loss_self_iou": 0.096, "cardinality_error": 3.652, "loss_ce_0": 0.261, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.169, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.745, "total_loss": 13.43}, "178000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.103, "cardinality_error": 3.664, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.664, "loss_caption_0": 2.682, "loss_caption": 2.68, "total_loss": 13.211}, "179000": {"loss_ce": 0.261, "loss_counter": 0.105, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.09, "cardinality_error": 3.825, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.825, "loss_caption_0": 2.788, "loss_caption": 2.796, "total_loss": 13.671}, "180000": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 2.781, "loss_caption": 2.775, "total_loss": 13.608}, "181000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.781, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.781, "loss_caption_0": 2.743, "loss_caption": 2.746, "total_loss": 13.452}, "182000": {"loss_ce": 0.255, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.1, "cardinality_error": 3.726, "loss_ce_0": 0.26, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.726, "loss_caption_0": 2.748, "loss_caption": 2.746, "total_loss": 13.472}, "183000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.722, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.722, "loss_caption_0": 2.729, "loss_caption": 2.734, "total_loss": 13.405}, "184000": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.726, "loss_ce_0": 0.257, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.726, "loss_caption_0": 2.783, "loss_caption": 2.787, "total_loss": 13.591}, "185000": {"loss_ce": 0.255, "loss_counter": 0.098, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.087, "cardinality_error": 3.667, "loss_ce_0": 0.26, "loss_counter_0": 0.098, "loss_bbox_0": 0.065, "loss_giou_0": 0.175, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.667, "loss_caption_0": 2.718, "loss_caption": 2.716, "total_loss": 13.354}, "186000": {"loss_ce": 0.254, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.776, "loss_ce_0": 0.259, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.776, "loss_caption_0": 2.75, "loss_caption": 2.75, "total_loss": 13.494}, "187000": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.803, "loss_ce_0": 0.264, "loss_counter_0": 0.109, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.803, "loss_caption_0": 2.788, "loss_caption": 2.791, "total_loss": 13.678}, "188000": {"loss_ce": 0.253, "loss_counter": 0.1, "loss_bbox": 0.062, "loss_giou": 0.163, "loss_self_iou": 0.091, "cardinality_error": 3.71, "loss_ce_0": 0.259, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.71, "loss_caption_0": 2.745, "loss_caption": 2.743, "total_loss": 13.444}, "189000": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.165, "loss_self_iou": 0.1, "cardinality_error": 3.748, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.748, "loss_caption_0": 2.751, "loss_caption": 2.753, "total_loss": 13.484}, "190000": {"loss_ce": 0.257, "loss_counter": 0.104, "loss_bbox": 0.06, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 2.729, "loss_caption": 2.73, "total_loss": 13.395}, "191000": {"loss_ce": 0.251, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.086, "cardinality_error": 3.653, "loss_ce_0": 0.257, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.653, "loss_caption_0": 2.754, "loss_caption": 2.752, "total_loss": 13.501}, "192000": {"loss_ce": 0.252, "loss_counter": 0.1, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.094, "cardinality_error": 3.767, "loss_ce_0": 0.258, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.767, "loss_caption_0": 2.717, "loss_caption": 2.72, "total_loss": 13.343}, "193000": {"loss_ce": 0.25, "loss_counter": 0.106, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.093, "cardinality_error": 3.847, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.847, "loss_caption_0": 2.754, "loss_caption": 2.759, "total_loss": 13.499}, "194000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.165, "loss_self_iou": 0.097, "cardinality_error": 3.775, "loss_ce_0": 0.262, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.775, "loss_caption_0": 2.769, "loss_caption": 2.772, "total_loss": 13.587}, "195000": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.794, "loss_ce_0": 0.261, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.794, "loss_caption_0": 2.751, "loss_caption": 2.751, "total_loss": 13.506}, "196000": {"loss_ce": 0.251, "loss_counter": 0.095, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.1, "cardinality_error": 3.652, "loss_ce_0": 0.258, "loss_counter_0": 0.095, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.735, "total_loss": 13.403}, "197000": {"loss_ce": 0.251, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.091, "cardinality_error": 3.759, "loss_ce_0": 0.258, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.759, "loss_caption_0": 2.74, "loss_caption": 2.743, "total_loss": 13.418}, "198000": {"loss_ce": 0.249, "loss_counter": 0.098, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.092, "cardinality_error": 3.664, "loss_ce_0": 0.255, "loss_counter_0": 0.098, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.664, "loss_caption_0": 2.718, "loss_caption": 2.72, "total_loss": 13.31}, "199000": {"loss_ce": 0.252, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.101, "cardinality_error": 3.736, "loss_ce_0": 0.257, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.736, "loss_caption_0": 2.759, "loss_caption": 2.76, "total_loss": 13.502}, "200000": {"loss_ce": 0.253, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.159, "loss_self_iou": 0.098, "cardinality_error": 3.701, "loss_ce_0": 0.259, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.701, "loss_caption_0": 2.766, "loss_caption": 2.771, "total_loss": 13.518}}, "lr_history": {"1000": 5e-05, "2000": 5e-05, "3000": 5e-05, "4000": 5e-05, "5000": 5e-05, "6000": 5e-05, "7000": 5e-05, "8000": 5e-05, "9000": 5e-05, "10000": 5e-05, "11000": 5e-05, "12000": 5e-05, "13000": 5e-05, "14000": 5e-05, "15000": 5e-05, "16000": 5e-05, "17000": 5e-05, "18000": 5e-05, "19000": 5e-05, "20000": 5e-05, "21000": 5e-05, "22000": 5e-05, "23000": 5e-05, "24000": 5e-05, "25000": 5e-05, "26000": 5e-05, "27000": 5e-05, "28000": 5e-05, "29000": 5e-05, "30000": 5e-05, "31000": 5e-05, "32000": 5e-05, "33000": 5e-05, "34000": 5e-05, "35000": 5e-05, "36000": 5e-05, "37000": 5e-05, "38000": 5e-05, "39000": 5e-05, "40000": 5e-05, "41000": 5e-05, "42000": 5e-05, "43000": 5e-05, "44000": 5e-05, "45000": 5e-05, "46000": 5e-05, "47000": 5e-05, "48000": 5e-05, "49000": 5e-05, "50000": 5e-05, "51000": 5e-05, "52000": 5e-05, "53000": 5e-05, "54000": 5e-05, "55000": 5e-05, "56000": 5e-05, "57000": 5e-05, "58000": 5e-05, "59000": 5e-05, "60000": 5e-05, "61000": 5e-05, "62000": 5e-05, "63000": 5e-05, "64000": 5e-05, "65000": 5e-05, "66000": 5e-05, "67000": 5e-05, "68000": 5e-05, "69000": 5e-05, "70000": 5e-05, "71000": 5e-05, "72000": 5e-05, "73000": 5e-05, "74000": 5e-05, "75000": 5e-05, "76000": 5e-05, "77000": 5e-05, "78000": 5e-05, "79000": 5e-05, "80000": 5e-05, "81000": 2.5e-05, "82000": 2.5e-05, "83000": 2.5e-05, "84000": 2.5e-05, "85000": 2.5e-05, "86000": 2.5e-05, "87000": 2.5e-05, "88000": 2.5e-05, "89000": 2.5e-05, "90000": 2.5e-05, "91000": 2.5e-05, "92000": 2.5e-05, "93000": 2.5e-05, "94000": 2.5e-05, "95000": 2.5e-05, "96000": 2.5e-05, "97000": 2.5e-05, "98000": 2.5e-05, "99000": 2.5e-05, "100000": 2.5e-05, "101000": 2.5e-05, "102000": 2.5e-05, "103000": 2.5e-05, "104000": 2.5e-05, "105000": 2.5e-05, "106000": 2.5e-05, "107000": 2.5e-05, "108000": 2.5e-05, "109000": 2.5e-05, "110000": 2.5e-05, "111000": 1.25e-05, "112000": 1.25e-05, "113000": 1.25e-05, "114000": 1.25e-05, "115000": 1.25e-05, "116000": 1.25e-05, "117000": 1.25e-05, "118000": 1.25e-05, "119000": 1.25e-05, "120000": 1.25e-05, "121000": 1.25e-05, "122000": 1.25e-05, "123000": 1.25e-05, "124000": 1.25e-05, "125000": 1.25e-05, "126000": 1.25e-05, "127000": 1.25e-05, "128000": 1.25e-05, "129000": 1.25e-05, "130000": 1.25e-05, "131000": 1.25e-05, "132000": 1.25e-05, "133000": 1.25e-05, "134000": 1.25e-05, "135000": 1.25e-05, "136000": 1.25e-05, "137000": 1.25e-05, "138000": 1.25e-05, "139000": 1.25e-05, "140000": 1.25e-05, "141000": 6.25e-06, "142000": 6.25e-06, "143000": 6.25e-06, "144000": 6.25e-06, "145000": 6.25e-06, "146000": 6.25e-06, "147000": 6.25e-06, "148000": 6.25e-06, "149000": 6.25e-06, "150000": 6.25e-06, "151000": 6.25e-06, "152000": 6.25e-06, "153000": 6.25e-06, "154000": 6.25e-06, "155000": 6.25e-06, "156000": 6.25e-06, "157000": 6.25e-06, "158000": 6.25e-06, "159000": 6.25e-06, "160000": 6.25e-06, "161000": 6.25e-06, "162000": 6.25e-06, "163000": 6.25e-06, "164000": 6.25e-06, "165000": 6.25e-06, "166000": 6.25e-06, "167000": 6.25e-06, "168000": 6.25e-06, "169000": 6.25e-06, "170000": 6.25e-06, "171000": 3.125e-06, "172000": 3.125e-06, "173000": 3.125e-06, "174000": 3.125e-06, "175000": 3.125e-06, "176000": 3.125e-06, "177000": 3.125e-06, "178000": 3.125e-06, "179000": 3.125e-06, "180000": 3.125e-06, "181000": 3.125e-06, "182000": 3.125e-06, "183000": 3.125e-06, "184000": 3.125e-06, "185000": 3.125e-06, "186000": 3.125e-06, "187000": 3.125e-06, "188000": 3.125e-06, "189000": 3.125e-06, "190000": 3.125e-06, "191000": 3.125e-06, "192000": 3.125e-06, "193000": 3.125e-06, "194000": 3.125e-06, "195000": 3.125e-06, "196000": 3.125e-06, "197000": 3.125e-06, "198000": 3.125e-06, "199000": 3.125e-06, "200000": 3.125e-06}}, "eval_history": {}} \ No newline at end of file diff --git a/anet_clip/model-best.pth b/anet_clip/model-best.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e4a90dfdd7212286f1fa03c604791bc873dd013 --- /dev/null +++ b/anet_clip/model-best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c +size 397662145 diff --git a/anet_clip/model-last.pth b/anet_clip/model-last.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e4a90dfdd7212286f1fa03c604791bc873dd013 --- /dev/null +++ b/anet_clip/model-last.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c +size 397662145 diff --git a/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0 b/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0 new file mode 100644 index 0000000000000000000000000000000000000000..07e67491b1b155acb14936e9a4a779d873fa5fd9 --- /dev/null +++ b/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9041922000c6e059adc92858535f91dd695cc35eb5dae7c5d5d47360108999 +size 180967 diff --git a/anet_clip/train.log b/anet_clip/train.log new file mode 100644 index 0000000000000000000000000000000000000000..765923c87c550a2ac7c30e84294d23b156013f81 --- /dev/null +++ b/anet_clip/train.log @@ -0,0 +1,1520 @@ +backup evironment completed ! +Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal + + +******************** All args: ************************************************* +align_contiguous = False +align_drop_z = 0 +align_keep_percentile = 0.1 +align_many_to_one = False +align_one_to_many = False +align_top_band_size = 0 +att_hid_size = 512 +aux_loss = True +backbone = None +base_cfg_path = cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml +basic_ss_prob = 0 +batch_size = 1 +batch_size_for_eval = 1 +bbox_loss_coef = 0 +beta = 1 +cap_dec_n_points = 4 +cap_nheads = 1 +cap_num_feature_levels = 4 +cap_prob_clip = False +caption_cost_type = loss +caption_decoder_type = standard +caption_loss_coef = 2 +cfg_path = cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml +cl_schedule_time = [0, 2] +cl_schedule_val = [0, 0.1] +clip_context_dim = 512 +cls_loss_coef = 2 +contrastive_hidden_size = 128 +contrastive_loss_start_coef = 0.0 +contrastive_loss_temperature = 0.1 +cost_alpha = 0.25 +cost_gamma = 2 +count_loss_coef = 0.5 +criteria_for_best_ckpt = overall +current_lr = 5e-05 +data_norm = 0 +data_rescale = 1 +debug = False +dec_layers = 2 +dec_n_points = 4 +device = cuda +dict_file = data/howto/vocabulary_howto_rate2_anet.json +dict_file_val = data/howto/vocabulary_howto_rate2_anet.json +dilation = False +disable_contrastive_projection = 1 +disable_cudnn = 0 +disable_mid_caption_heads = False +disable_rematch = False +disable_tqdm = False +drop_prob = 0.5 +ec_alpha = 1.0 +enable_bg_for_cl = True +enable_contrastive = False +enable_cross_video_cl = True +enable_e2t_cl = True +enc_layers = 2 +enc_n_points = 4 +eos_coef = 0.1 +epoch = 20 +eval_proposal_file = data/generated_proposals/dbg_trainval_top100.json +event_context_dim = None +feature_dim = 768 +feature_sample_rate = 1 +fix_xcw = 1 +focal_alpha = 0.25 +focal_gamma = 2.0 +focal_mil = False +frame_embedding_num = 100 +ft_gt_percent = 1.0 +giou_loss_coef = 4 +gpu_id = [] +grad_clip = 100.0 +gt_file_for_auc = data/anet/captiondata/val_all.json +gt_file_for_eval = ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval = ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +gt_proposal_sample_num = 20 +hidden_dim = 512 +hidden_dropout_prob = 0.5 +huggingface_cache_dir = .cache +id = seq2-ft(mix)-gt_percent-1.0 +id_ori = +input_encoding_size = 512 +invalid_video_json = [] +iteration = 3 +layer_norm_eps = 1e-12 +learning_rate_decay_every = 3 +learning_rate_decay_rate = 0.5 +learning_rate_decay_start = 8 +lloss_beta = 1 +lloss_cross_entropy = 0 +lloss_focal_loss = 0 +lloss_gau_mask = 1 +lr = 5e-05 +lr_backbone = 2e-05 +lr_backbone_names = ['None'] +lr_linear_proj_mult = 0.1 +lr_linear_proj_names = ['reference_points', 'sampling_offsets'] +lr_proj = 0 +map = True +matcher_type = default +max_caption_len = 50 +max_eseq_length = 10 +max_pos_num = 500 +max_text_input_len = 32 +merge_criterion = ins_cap_topk +merge_k_boxes = 3 +merge_mode = weighted_sum +mil_loss_coef = 0 +min_epoch_when_save = -1 +nheads = 8 +norm_ins_score = sigmoid +nthreads = 4 +num_classes = 1 +num_feature_levels = 4 +num_layers = 1 +num_neg_box = 10 +num_queries = 100 +optimizer_type = adam +position_embedding = sine +position_embedding_scale = 6.283185307179586 +pre_percent = 1.0 +pretrain = None +pretrain_path = +pretrained_language_model = CLIP +prior_anchor_duration_init = True +prior_manner = all +pseudo_box_aug = False +pseudo_box_aug_mode = random_range +pseudo_box_aug_num = 8 +pseudo_box_aug_ratio = 0.02 +pseudo_box_type = similarity_op_order_v2 +random_anchor_init = True +random_seed = False +ref_rank_loss_coef = 0.0 +refine_pseudo_box = False +refine_pseudo_stage_num = 2 +rnn_size = 512 +sample_method = nearest +save_all_checkpoint = 0 +save_checkpoint_every = 1 +save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs +scheduled_sampling_increase_every = 2 +scheduled_sampling_increase_prob = 0.05 +scheduled_sampling_max_prob = 0.25 +scheduled_sampling_start = -1 +seed = 777 +self_iou_loss_coef = 0.0 +set_cost_bbox = 0 +set_cost_caption = 0 +set_cost_cl = 0.0 +set_cost_class = 2 +set_cost_giou = 4 +set_cost_sim = 1.0 +share_caption_head = 1 +soft_attention = 1 +start_from = +start_from_mode = last +start_refine_epoch = -1 +statistic_mode = mode +text_encoder_learning_strategy = frozen +text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] +text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] +text_hidden_dim = 768 +top_frames = 30 +train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json'] +train_proposal_file = data/generated_proposals/dbg_trainval_top100.json +train_proposal_sample_num = 30 +train_proposal_type = gt +training_scheme = all +transformer_dropout_prob = 0.1 +transformer_ff_dim = 512 +transformer_input_type = queries +use_additional_cap_layer = False +use_additional_score_layer = False +use_anchor = 0 +use_neg_pseudo_box = False +use_pseudo_box = False +use_query_box_for_refine = 0 +val_caption_file = data/anet/captiondata/val_1.json +visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] +visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] +visual_feature_type = ['CLIP'] +vocab_size = 16221 +vocab_size_val = 16221 +weight_decay = 0.0001 +weighted_mil_loss = False +width_ratio = 1 +width_th = 1 +window_size = 2 +with_box_refine = 1 +wordRNN_input_feats_type = C + + +******************** Model structure: ****************************************** +PDVC( + (base_encoder): BaseEncoder( + (pos_embed): PositionEmbeddingSine( + (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True) + ) + (input_proj): ModuleList( + (0): Sequential( + (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (1): Sequential( + (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (2): Sequential( + (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (3): Sequential( + (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + ) + ) + (transformer): DeformableTransformer( + (encoder): DeformableTransformerEncoder( + (layers): ModuleList( + (0): DeformableTransformerEncoderLayer( + (self_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout2): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + (1): DeformableTransformerEncoderLayer( + (self_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout2): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (decoder): DeformableTransformerDecoder( + (layers): ModuleList( + (0): DeformableTransformerDecoderLayer( + (cross_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (self_attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (dropout2): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout4): Dropout(p=0.1, inplace=False) + (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + (1): DeformableTransformerDecoderLayer( + (cross_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (self_attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (dropout2): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout4): Dropout(p=0.1, inplace=False) + (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + ) + (bbox_head): ModuleList( + (0): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + (1): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + ) + ) + (pos_trans): Linear(in_features=512, out_features=1024, bias=True) + (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (reference_points): Linear(in_features=512, out_features=1, bias=True) + ) + (caption_head): ModuleList( + (0): LSTMDSACaptioner( + (embed): Embedding(16222, 512) + (logit): Linear(in_features=512, out_features=16222, bias=True) + (dropout): Dropout(p=0.5, inplace=False) + (core): ShowAttendTellCore( + (rnn): LSTM(1536, 512, bias=False, dropout=0.5) + (att_drop): Dropout(p=0.5, inplace=False) + (deformable_att): MSDeformAttnCap( + (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) + (attention_weights): Linear(in_features=1024, out_features=16, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (ctx2att): Linear(in_features=512, out_features=512, bias=True) + (h2att): Linear(in_features=512, out_features=512, bias=True) + (alpha_net): Linear(in_features=512, out_features=1, bias=True) + ) + ) + (1): LSTMDSACaptioner( + (embed): Embedding(16222, 512) + (logit): Linear(in_features=512, out_features=16222, bias=True) + (dropout): Dropout(p=0.5, inplace=False) + (core): ShowAttendTellCore( + (rnn): LSTM(1536, 512, bias=False, dropout=0.5) + (att_drop): Dropout(p=0.5, inplace=False) + (deformable_att): MSDeformAttnCap( + (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) + (attention_weights): Linear(in_features=1024, out_features=16, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (ctx2att): Linear(in_features=512, out_features=512, bias=True) + (h2att): Linear(in_features=512, out_features=512, bias=True) + (alpha_net): Linear(in_features=512, out_features=1, bias=True) + ) + ) + ) + (query_embed): Embedding(100, 1024) + (class_head): ModuleList( + (0): Linear(in_features=512, out_features=1, bias=True) + (1): Linear(in_features=512, out_features=1, bias=True) + ) + (class_refine_head): ModuleList( + (0): Linear(in_features=512, out_features=1, bias=True) + (1): Linear(in_features=512, out_features=1, bias=True) + ) + (count_head): ModuleList( + (0): Linear(in_features=512, out_features=11, bias=True) + (1): Linear(in_features=512, out_features=11, bias=True) + ) + (bbox_head): ModuleList( + (0): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + (1): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + ) + (contrastive_projection_event): ModuleList( + (0): Identity() + (1): Identity() + ) + (contrastive_projection_text): ModuleList( + (0): Identity() + (1): Identity() + ) +) + + +******************** Strat training ! ****************************************** +loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0']) +loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0]) +ID seq2-ft(mix)-gt_percent-1.0 iter 1000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.126), ('loss_bbox', 0.117), ('loss_giou', 0.275), ('loss_self_iou', 0.126), ('cardinality_error', 3.775), ('loss_ce_0', 0.284), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.118), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.126), ('cardinality_error_0', 3.775), ('loss_caption_0', 3.781), ('loss_caption', 3.778), ('total_loss', 18.585)]), +time/iter = 0.182, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.119), ('loss_bbox', 0.087), ('loss_giou', 0.239), ('loss_self_iou', 0.12), ('cardinality_error', 3.705), ('loss_ce_0', 0.289), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.087), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.121), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.682), ('loss_caption', 3.675), ('total_loss', 17.896)]), +time/iter = 0.180, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.227), ('loss_self_iou', 0.098), ('cardinality_error', 3.705), ('loss_ce_0', 0.292), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.668), ('loss_caption', 3.664), ('total_loss', 17.771)]), +time/iter = 0.181, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.126), ('loss_bbox', 0.078), ('loss_giou', 0.224), ('loss_self_iou', 0.1), ('cardinality_error', 3.784), ('loss_ce_0', 0.291), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.624), ('loss_caption', 3.629), ('total_loss', 17.579)]), +time/iter = 0.174, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.121), ('loss_bbox', 0.08), ('loss_giou', 0.218), ('loss_self_iou', 0.114), ('cardinality_error', 3.674), ('loss_ce_0', 0.287), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.08), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.674), ('loss_caption_0', 3.629), ('loss_caption', 3.629), ('total_loss', 17.526)]), +time/iter = 0.178, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.13), ('loss_bbox', 0.076), ('loss_giou', 0.22), ('loss_self_iou', 0.098), ('cardinality_error', 3.786), ('loss_ce_0', 0.293), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.786), ('loss_caption_0', 3.625), ('loss_caption', 3.622), ('total_loss', 17.555)]), +time/iter = 0.182, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.215), ('loss_self_iou', 0.097), ('cardinality_error', 3.746), ('loss_ce_0', 0.293), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.746), ('loss_caption_0', 3.58), ('loss_caption', 3.576), ('total_loss', 17.319)]), +time/iter = 0.179, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.129), ('loss_bbox', 0.078), ('loss_giou', 0.218), ('loss_self_iou', 0.108), ('cardinality_error', 3.754), ('loss_ce_0', 0.288), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.079), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.754), ('loss_caption_0', 3.546), ('loss_caption', 3.546), ('total_loss', 17.209)]), +time/iter = 0.184, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.078), ('loss_giou', 0.219), ('loss_self_iou', 0.1), ('cardinality_error', 3.685), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.219), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.685), ('loss_caption_0', 3.544), ('loss_caption', 3.54), ('total_loss', 17.2)]), +time/iter = 0.180, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10000 (epoch 0), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.22), ('loss_self_iou', 0.101), ('cardinality_error', 3.748), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.748), ('loss_caption_0', 3.582), ('loss_caption', 3.577), ('total_loss', 17.376)]), +time/iter = 0.180, bad_vid = 0.000 + +Validation results of iter 10009: +Bleu_1:0.15656016917085527 +Bleu_2:0.08210369852679855 +Bleu_3:0.042491746140277446 +Bleu_4:0.021149866989626908 +METEOR:0.08752782819459405 +ROUGE_L:0.1577032846084498 +CIDEr:0.2687260839927409 +Recall:0.4986985069085389 +Precision:0.548450952477792 +soda_c:0.045070258467165024 +para_Bleu_1:0.36987086578065714 +para_Bleu_2:0.1987998709052068 +para_Bleu_3:0.11671522868501899 +para_Bleu_4:0.07164097958462183 +para_METEOR:0.13901753612789455 +para_ROUGE_L:0.2826680559963382 +para_CIDEr:0.0956891322121665 + +overall score of iter 10009: 0.3063476479246829 + +Save model at iter 10009 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 10009 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 11000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.124), ('loss_bbox', 0.077), ('loss_giou', 0.217), ('loss_self_iou', 0.101), ('cardinality_error', 3.788), ('loss_ce_0', 0.292), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.788), ('loss_caption_0', 3.446), ('loss_caption', 3.443), ('total_loss', 16.802)]), +time/iter = 0.707, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.694), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.694), ('loss_caption_0', 3.427), ('loss_caption', 3.428), ('total_loss', 16.701)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.217), ('loss_self_iou', 0.107), ('cardinality_error', 3.689), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.689), ('loss_caption_0', 3.464), ('loss_caption', 3.461), ('total_loss', 16.871)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.21), ('loss_self_iou', 0.1), ('cardinality_error', 3.663), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.663), ('loss_caption_0', 3.414), ('loss_caption', 3.41), ('total_loss', 16.616)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.127), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.828), ('loss_ce_0', 0.296), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.828), ('loss_caption_0', 3.453), ('loss_caption', 3.453), ('total_loss', 16.836)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.073), ('loss_giou', 0.206), ('loss_self_iou', 0.105), ('cardinality_error', 3.687), ('loss_ce_0', 0.297), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.461), ('loss_caption', 3.462), ('total_loss', 16.803)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.127), ('loss_bbox', 0.073), ('loss_giou', 0.208), ('loss_self_iou', 0.102), ('cardinality_error', 3.791), ('loss_ce_0', 0.3), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.791), ('loss_caption_0', 3.469), ('loss_caption', 3.465), ('total_loss', 16.864)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.074), ('loss_giou', 0.205), ('loss_self_iou', 0.107), ('cardinality_error', 3.68), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.68), ('loss_caption_0', 3.478), ('loss_caption', 3.475), ('total_loss', 16.859)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.073), ('loss_giou', 0.207), ('loss_self_iou', 0.099), ('cardinality_error', 3.752), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.396), ('loss_caption', 3.396), ('total_loss', 16.585)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20000 (epoch 1), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.128), ('loss_bbox', 0.071), ('loss_giou', 0.208), ('loss_self_iou', 0.101), ('cardinality_error', 3.804), ('loss_ce_0', 0.304), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.804), ('loss_caption_0', 3.42), ('loss_caption', 3.419), ('total_loss', 16.684)]), +time/iter = 0.189, bad_vid = 0.000 + +Validation results of iter 20018: +Bleu_1:0.15965966113561106 +Bleu_2:0.08785069799970043 +Bleu_3:0.04739925348589703 +Bleu_4:0.02377096308421814 +METEOR:0.09062964515721111 +ROUGE_L:0.1652647774491388 +CIDEr:0.27366191469495676 +Recall:0.45131293652113946 +Precision:0.5379414954918249 +soda_c:0.04303682007432423 +para_Bleu_1:0.3640361416830845 +para_Bleu_2:0.1986476696673755 +para_Bleu_3:0.11814800235116821 +para_Bleu_4:0.07336184523852665 +para_METEOR:0.13911724177507803 +para_ROUGE_L:0.28211794880017504 +para_CIDEr:0.08634617454158834 + +overall score of iter 20018: 0.29882526155519307 + +Save model at iter 20018 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 21000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.202), ('loss_self_iou', 0.101), ('cardinality_error', 3.666), ('loss_ce_0', 0.299), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.666), ('loss_caption_0', 3.344), ('loss_caption', 3.335), ('total_loss', 16.294)]), +time/iter = 0.726, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.201), ('loss_self_iou', 0.109), ('cardinality_error', 3.752), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.302), ('loss_caption', 3.304), ('total_loss', 16.116)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.128), ('loss_bbox', 0.077), ('loss_giou', 0.208), ('loss_self_iou', 0.113), ('cardinality_error', 3.803), ('loss_ce_0', 0.299), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.112), ('cardinality_error_0', 3.803), ('loss_caption_0', 3.348), ('loss_caption', 3.34), ('total_loss', 16.363)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.122), ('loss_bbox', 0.076), ('loss_giou', 0.207), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.294), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 3.354), ('loss_caption', 3.351), ('total_loss', 16.364)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.213), ('loss_self_iou', 0.091), ('cardinality_error', 3.734), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.734), ('loss_caption_0', 3.372), ('loss_caption', 3.372), ('total_loss', 16.494)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.125), ('loss_bbox', 0.072), ('loss_giou', 0.203), ('loss_self_iou', 0.096), ('cardinality_error', 3.784), ('loss_ce_0', 0.299), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.334), ('loss_caption', 3.333), ('total_loss', 16.279)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 27000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.118), ('loss_bbox', 0.076), ('loss_giou', 0.203), ('loss_self_iou', 0.102), ('cardinality_error', 3.64), ('loss_ce_0', 0.291), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.64), ('loss_caption_0', 3.348), ('loss_caption', 3.345), ('total_loss', 16.287)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 28000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.201), ('loss_self_iou', 0.095), ('cardinality_error', 3.774), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.774), ('loss_caption_0', 3.337), ('loss_caption', 3.333), ('total_loss', 16.249)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 29000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.12), ('loss_bbox', 0.075), ('loss_giou', 0.204), ('loss_self_iou', 0.1), ('cardinality_error', 3.755), ('loss_ce_0', 0.299), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.755), ('loss_caption_0', 3.315), ('loss_caption', 3.321), ('total_loss', 16.223)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 30000 (epoch 2), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.119), ('loss_bbox', 0.071), ('loss_giou', 0.195), ('loss_self_iou', 0.103), ('cardinality_error', 3.72), ('loss_ce_0', 0.302), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.72), ('loss_caption_0', 3.347), ('loss_caption', 3.349), ('total_loss', 16.283)]), +time/iter = 0.195, bad_vid = 0.000 + +Validation results of iter 30027: +Bleu_1:0.15440507165989542 +Bleu_2:0.08178273697953425 +Bleu_3:0.042600749568780155 +Bleu_4:0.02119123483046711 +METEOR:0.08563216148714695 +ROUGE_L:0.156809182143994 +CIDEr:0.25960752079137744 +Recall:0.5075951227720545 +Precision:0.571834112941489 +soda_c:0.048597974030683 +para_Bleu_1:0.3985431504573892 +para_Bleu_2:0.22415947108296613 +para_Bleu_3:0.1341003834690626 +para_Bleu_4:0.08312155143550452 +para_METEOR:0.1510085678983445 +para_ROUGE_L:0.2957598062989384 +para_CIDEr:0.12271570278513648 + +overall score of iter 30027: 0.3568458221189855 + +Save model at iter 30027 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 30027 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 31000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.123), ('loss_bbox', 0.073), ('loss_giou', 0.202), ('loss_self_iou', 0.114), ('cardinality_error', 3.772), ('loss_ce_0', 0.296), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.772), ('loss_caption_0', 3.24), ('loss_caption', 3.242), ('total_loss', 15.889)]), +time/iter = 0.725, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 32000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.093), ('cardinality_error', 3.66), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.251), ('loss_caption', 3.248), ('total_loss', 15.869)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 33000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.787), ('loss_ce_0', 0.301), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.223), ('loss_caption', 3.225), ('total_loss', 15.81)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 34000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.121), ('loss_bbox', 0.076), ('loss_giou', 0.201), ('loss_self_iou', 0.107), ('cardinality_error', 3.719), ('loss_ce_0', 0.296), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.719), ('loss_caption_0', 3.21), ('loss_caption', 3.206), ('total_loss', 15.752)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 35000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.074), ('loss_giou', 0.201), ('loss_self_iou', 0.1), ('cardinality_error', 3.761), ('loss_ce_0', 0.304), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.761), ('loss_caption_0', 3.261), ('loss_caption', 3.267), ('total_loss', 16.006)]), +time/iter = 0.187, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 36000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.12), ('loss_bbox', 0.074), ('loss_giou', 0.202), ('loss_self_iou', 0.096), ('cardinality_error', 3.731), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.731), ('loss_caption_0', 3.322), ('loss_caption', 3.322), ('total_loss', 16.237)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 37000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.088), ('cardinality_error', 3.747), ('loss_ce_0', 0.306), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.276), ('loss_caption', 3.278), ('total_loss', 16.005)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 38000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.122), ('loss_bbox', 0.073), ('loss_giou', 0.198), ('loss_self_iou', 0.096), ('cardinality_error', 3.747), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.26), ('loss_caption', 3.267), ('total_loss', 15.944)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 39000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.073), ('loss_giou', 0.194), ('loss_self_iou', 0.096), ('cardinality_error', 3.714), ('loss_ce_0', 0.3), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 3.29), ('loss_caption', 3.284), ('total_loss', 16.029)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 40000 (epoch 3), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.068), ('loss_giou', 0.187), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 3.255), ('loss_caption', 3.258), ('total_loss', 15.861)]), +time/iter = 0.191, bad_vid = 0.000 + +Validation results of iter 40036: +Bleu_1:0.16003947012491918 +Bleu_2:0.08640386650819816 +Bleu_3:0.045769192920880976 +Bleu_4:0.023139762266241797 +METEOR:0.08893476927946467 +ROUGE_L:0.16285119298911696 +CIDEr:0.27850058398714506 +Recall:0.4974410652224822 +Precision:0.571762083926507 +soda_c:0.04898353247531122 +para_Bleu_1:0.4116267700746525 +para_Bleu_2:0.23315066082372427 +para_Bleu_3:0.139785630195007 +para_Bleu_4:0.08689414164874545 +para_METEOR:0.15321412716959742 +para_ROUGE_L:0.2993749803089721 +para_CIDEr:0.12755194391496638 + +overall score of iter 40036: 0.3676602127333093 + +Save model at iter 40036 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 40036 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 41000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.196), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.159), ('loss_caption', 3.162), ('total_loss', 15.549)]), +time/iter = 0.733, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 42000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.698), ('loss_ce_0', 0.298), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.698), ('loss_caption_0', 3.191), ('loss_caption', 3.187), ('total_loss', 15.571)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 43000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.198), ('loss_self_iou', 0.089), ('cardinality_error', 3.785), ('loss_ce_0', 0.306), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.785), ('loss_caption_0', 3.247), ('loss_caption', 3.249), ('total_loss', 15.93)]), +time/iter = 0.195, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 44000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.072), ('loss_giou', 0.194), ('loss_self_iou', 0.104), ('cardinality_error', 3.727), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.727), ('loss_caption_0', 3.228), ('loss_caption', 3.227), ('total_loss', 15.794)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 45000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.194), ('loss_self_iou', 0.094), ('cardinality_error', 3.684), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.684), ('loss_caption_0', 3.138), ('loss_caption', 3.143), ('total_loss', 15.458)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 46000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.194), ('loss_self_iou', 0.107), ('cardinality_error', 3.8), ('loss_ce_0', 0.301), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.8), ('loss_caption_0', 3.198), ('loss_caption', 3.202), ('total_loss', 15.69)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 47000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.071), ('loss_giou', 0.193), ('loss_self_iou', 0.1), ('cardinality_error', 3.724), ('loss_ce_0', 0.302), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.166), ('loss_caption', 3.167), ('total_loss', 15.544)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 48000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.074), ('loss_giou', 0.194), ('loss_self_iou', 0.1), ('cardinality_error', 3.779), ('loss_ce_0', 0.303), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.779), ('loss_caption_0', 3.197), ('loss_caption', 3.204), ('total_loss', 15.693)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 49000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.186), ('loss_self_iou', 0.103), ('cardinality_error', 3.67), ('loss_ce_0', 0.299), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.67), ('loss_caption_0', 3.197), ('loss_caption', 3.193), ('total_loss', 15.597)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 50000 (epoch 4), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.191), ('loss_self_iou', 0.1), ('cardinality_error', 3.769), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 3.195), ('loss_caption', 3.196), ('total_loss', 15.646)]), +time/iter = 0.193, bad_vid = 0.000 + +Validation results of iter 50045: +Bleu_1:0.1612752203314224 +Bleu_2:0.08712092952271142 +Bleu_3:0.04643407984417907 +Bleu_4:0.024237450149938583 +METEOR:0.0888552980469009 +ROUGE_L:0.16165678007821221 +CIDEr:0.28844655875134945 +Recall:0.5079771255793173 +Precision:0.5707494407158785 +soda_c:0.05143467092505771 +para_Bleu_1:0.425828341023263 +para_Bleu_2:0.2431293051387748 +para_Bleu_3:0.14662751878582 +para_Bleu_4:0.09131956416083617 +para_METEOR:0.15868276543147294 +para_ROUGE_L:0.30762031965083425 +para_CIDEr:0.1438790695271004 + +overall score of iter 50045: 0.39388139911940956 + +Save model at iter 50045 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 50045 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 51000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.119), ('loss_bbox', 0.072), ('loss_giou', 0.19), ('loss_self_iou', 0.1), ('cardinality_error', 3.708), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.708), ('loss_caption_0', 3.123), ('loss_caption', 3.122), ('total_loss', 15.345)]), +time/iter = 0.739, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 52000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.195), ('loss_self_iou', 0.091), ('cardinality_error', 3.787), ('loss_ce_0', 0.302), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.08), ('loss_caption', 3.08), ('total_loss', 15.224)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 53000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.192), ('loss_self_iou', 0.101), ('cardinality_error', 3.688), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.688), ('loss_caption_0', 3.121), ('loss_caption', 3.125), ('total_loss', 15.366)]), +time/iter = 0.196, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 54000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.66), ('loss_ce_0', 0.303), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.151), ('loss_caption', 3.158), ('total_loss', 15.44)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 55000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.759), ('loss_ce_0', 0.314), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.759), ('loss_caption_0', 3.137), ('loss_caption', 3.138), ('total_loss', 15.427)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 56000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.7), ('loss_ce_0', 0.303), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.7), ('loss_caption_0', 3.128), ('loss_caption', 3.132), ('total_loss', 15.353)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 57000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.125), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.833), ('loss_ce_0', 0.308), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.833), ('loss_caption_0', 3.157), ('loss_caption', 3.154), ('total_loss', 15.516)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 58000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.072), ('loss_giou', 0.192), ('loss_self_iou', 0.099), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.092), ('loss_caption', 3.088), ('total_loss', 15.209)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 59000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.092), ('cardinality_error', 3.806), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.806), ('loss_caption_0', 3.204), ('loss_caption', 3.204), ('total_loss', 15.668)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 60000 (epoch 5), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.73), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.185), ('loss_caption', 3.179), ('total_loss', 15.62)]), +time/iter = 0.192, bad_vid = 0.000 + +Validation results of iter 60054: +Bleu_1:0.16203040821313286 +Bleu_2:0.087418866671477 +Bleu_3:0.04641401855891123 +Bleu_4:0.023872355329811287 +METEOR:0.08736154709181514 +ROUGE_L:0.16095171754962678 +CIDEr:0.3019460931650574 +Recall:0.5237442505746305 +Precision:0.5691986983933232 +soda_c:0.05366939846142926 +para_Bleu_1:0.4285515683378188 +para_Bleu_2:0.24896313523930838 +para_Bleu_3:0.15083849533584295 +para_Bleu_4:0.09425440122753082 +para_METEOR:0.15418242275887206 +para_ROUGE_L:0.3037081433191389 +para_CIDEr:0.16822639157343386 + +overall score of iter 60054: 0.41666321555983676 + +Save model at iter 60054 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 60054 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 61000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.687), ('loss_ce_0', 0.303), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.025), ('loss_caption', 3.031), ('total_loss', 14.914)]), +time/iter = 0.715, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 62000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.125), ('loss_bbox', 0.068), ('loss_giou', 0.192), ('loss_self_iou', 0.088), ('cardinality_error', 3.809), ('loss_ce_0', 0.304), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.809), ('loss_caption_0', 3.067), ('loss_caption', 3.064), ('total_loss', 15.147)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 63000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.072), ('loss_giou', 0.189), ('loss_self_iou', 0.102), ('cardinality_error', 3.636), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.636), ('loss_caption_0', 3.09), ('loss_caption', 3.083), ('total_loss', 15.188)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 64000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.185), ('loss_self_iou', 0.105), ('cardinality_error', 3.738), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.738), ('loss_caption_0', 3.09), ('loss_caption', 3.088), ('total_loss', 15.193)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 65000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.191), ('loss_self_iou', 0.094), ('cardinality_error', 3.735), ('loss_ce_0', 0.304), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.735), ('loss_caption_0', 3.087), ('loss_caption', 3.083), ('total_loss', 15.203)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 66000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.121), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.095), ('cardinality_error', 3.753), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.093), ('loss_caption', 3.093), ('total_loss', 15.235)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 67000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.189), ('loss_self_iou', 0.099), ('cardinality_error', 3.781), ('loss_ce_0', 0.299), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.781), ('loss_caption_0', 3.104), ('loss_caption', 3.095), ('total_loss', 15.24)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 68000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.702), ('loss_ce_0', 0.3), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.702), ('loss_caption_0', 3.092), ('loss_caption', 3.087), ('total_loss', 15.171)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 69000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.116), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.087), ('cardinality_error', 3.705), ('loss_ce_0', 0.303), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.087), ('loss_caption', 3.084), ('total_loss', 15.154)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 70000 (epoch 6), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.119), ('loss_bbox', 0.07), ('loss_giou', 0.188), ('loss_self_iou', 0.104), ('cardinality_error', 3.763), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.763), ('loss_caption_0', 3.137), ('loss_caption', 3.142), ('total_loss', 15.421)]), +time/iter = 0.201, bad_vid = 0.000 + +Validation results of iter 70063: +Bleu_1:0.17095715677415013 +Bleu_2:0.0951967897773989 +Bleu_3:0.05145074727592996 +Bleu_4:0.026686223548170303 +METEOR:0.09033289555302068 +ROUGE_L:0.16939818741017104 +CIDEr:0.33299543538258497 +Recall:0.5001550726802355 +Precision:0.5629321740898863 +soda_c:0.05378783144134501 +para_Bleu_1:0.44719474980697405 +para_Bleu_2:0.2615784516531111 +para_Bleu_3:0.15956746990786394 +para_Bleu_4:0.09983770060804388 +para_METEOR:0.15549284849496958 +para_ROUGE_L:0.30852597622578265 +para_CIDEr:0.18758102150887232 + +overall score of iter 70063: 0.4429115706118858 + +Save model at iter 70063 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 70063 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 71000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.115), ('loss_bbox', 0.067), ('loss_giou', 0.187), ('loss_self_iou', 0.091), ('cardinality_error', 3.724), ('loss_ce_0', 0.304), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.994), ('loss_caption', 2.994), ('total_loss', 14.812)]), +time/iter = 0.691, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 72000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.099), ('cardinality_error', 3.665), ('loss_ce_0', 0.296), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.995), ('loss_caption', 3.0), ('total_loss', 14.803)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 73000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.067), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.762), ('loss_ce_0', 0.302), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.762), ('loss_caption_0', 3.03), ('loss_caption', 3.034), ('total_loss', 14.924)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 74000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.181), ('loss_self_iou', 0.093), ('cardinality_error', 3.722), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.722), ('loss_caption_0', 3.061), ('loss_caption', 3.062), ('total_loss', 15.037)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 75000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.835), ('loss_caption_0', 3.102), ('loss_caption', 3.108), ('total_loss', 15.261)]), +time/iter = 0.195, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 76000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.19), ('loss_self_iou', 0.096), ('cardinality_error', 3.787), ('loss_ce_0', 0.305), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.055), ('loss_caption', 3.056), ('total_loss', 15.081)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 77000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.191), ('loss_self_iou', 0.101), ('cardinality_error', 3.753), ('loss_ce_0', 0.3), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.064), ('loss_caption', 3.063), ('total_loss', 15.105)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 78000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.812), ('loss_ce_0', 0.302), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.812), ('loss_caption_0', 3.075), ('loss_caption', 3.081), ('total_loss', 15.186)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 79000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.119), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.712), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.712), ('loss_caption_0', 3.004), ('loss_caption', 3.004), ('total_loss', 14.833)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 80000 (epoch 7), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.639), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.639), ('loss_caption_0', 3.011), ('loss_caption', 3.021), ('total_loss', 14.846)]), +time/iter = 0.189, bad_vid = 0.000 + +Validation results of iter 80072: +Bleu_1:0.16525493799366836 +Bleu_2:0.09017429361474327 +Bleu_3:0.04843073565357156 +Bleu_4:0.025752141227780294 +METEOR:0.09042668571725655 +ROUGE_L:0.1657835735936403 +CIDEr:0.30766696683798356 +Recall:0.5070758476264831 +Precision:0.5698723815334497 +soda_c:0.05193286444599829 +para_Bleu_1:0.4299765573510605 +para_Bleu_2:0.24998607326423264 +para_Bleu_3:0.15168978606887273 +para_Bleu_4:0.09540463753102806 +para_METEOR:0.15913054274631774 +para_ROUGE_L:0.30821511076520103 +para_CIDEr:0.14655297481419807 + +overall score of iter 80072: 0.4010881550915439 + +Save model at iter 80072 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 81000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.098), ('cardinality_error', 3.664), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.972), ('loss_caption', 2.974), ('total_loss', 14.63)]), +time/iter = 0.723, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 82000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.098), ('cardinality_error', 3.692), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.914), ('loss_caption', 2.912), ('total_loss', 14.413)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 83000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.939), ('loss_caption', 2.933), ('total_loss', 14.562)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 84000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.119), ('loss_bbox', 0.066), ('loss_giou', 0.18), ('loss_self_iou', 0.086), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.086), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.964), ('loss_caption', 2.963), ('total_loss', 14.614)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 85000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.187), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.301), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 2.942), ('loss_caption', 2.945), ('total_loss', 14.596)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 86000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.067), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.989), ('loss_caption', 2.988), ('total_loss', 14.745)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 87000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.119), ('loss_bbox', 0.067), ('loss_giou', 0.178), ('loss_self_iou', 0.096), ('cardinality_error', 3.692), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.93), ('loss_caption', 2.931), ('total_loss', 14.465)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 88000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.102), ('cardinality_error', 3.74), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.945), ('loss_caption', 2.939), ('total_loss', 14.538)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 89000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.096), ('cardinality_error', 3.911), ('loss_ce_0', 0.303), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.911), ('loss_caption_0', 2.981), ('loss_caption', 2.985), ('total_loss', 14.762)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 90000 (epoch 8), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.113), ('loss_bbox', 0.066), ('loss_giou', 0.174), ('loss_self_iou', 0.099), ('cardinality_error', 3.667), ('loss_ce_0', 0.3), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.946), ('loss_caption', 2.945), ('total_loss', 14.493)]), +time/iter = 0.191, bad_vid = 0.000 + +Validation results of iter 90081: +Bleu_1:0.1659435247550983 +Bleu_2:0.09010888064116455 +Bleu_3:0.04740925434645997 +Bleu_4:0.023810200153797586 +METEOR:0.0893691583245007 +ROUGE_L:0.16481267120708817 +CIDEr:0.3096929324572276 +Recall:0.5271698247293078 +Precision:0.5766981899532185 +soda_c:0.05637593299631936 +para_Bleu_1:0.4507795558374508 +para_Bleu_2:0.2668765313566654 +para_Bleu_3:0.16324000259413463 +para_Bleu_4:0.10292908422008885 +para_METEOR:0.163503434468027 +para_ROUGE_L:0.3141109355407807 +para_CIDEr:0.1830754815850521 + +overall score of iter 90081: 0.44950800027316795 + +Save model at iter 90081 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 90081 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 91000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.066), ('loss_giou', 0.179), ('loss_self_iou', 0.097), ('cardinality_error', 3.807), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.807), ('loss_caption_0', 2.916), ('loss_caption', 2.914), ('total_loss', 14.411)]), +time/iter = 0.724, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 92000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.121), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.093), ('cardinality_error', 3.784), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.784), ('loss_caption_0', 2.916), ('loss_caption', 2.915), ('total_loss', 14.422)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 93000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.117), ('loss_bbox', 0.065), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.806), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.806), ('loss_caption_0', 2.9), ('loss_caption', 2.905), ('total_loss', 14.377)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 94000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.109), ('loss_bbox', 0.068), ('loss_giou', 0.174), ('loss_self_iou', 0.105), ('cardinality_error', 3.616), ('loss_ce_0', 0.293), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.106), ('cardinality_error_0', 3.616), ('loss_caption_0', 2.912), ('loss_caption', 2.914), ('total_loss', 14.339)]), +time/iter = 0.187, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 95000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.12), ('loss_bbox', 0.066), ('loss_giou', 0.185), ('loss_self_iou', 0.093), ('cardinality_error', 3.805), ('loss_ce_0', 0.296), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.805), ('loss_caption_0', 2.938), ('loss_caption', 2.941), ('total_loss', 14.546)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 96000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.177), ('loss_self_iou', 0.103), ('cardinality_error', 3.684), ('loss_ce_0', 0.293), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.684), ('loss_caption_0', 2.928), ('loss_caption', 2.931), ('total_loss', 14.434)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 97000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.111), ('loss_bbox', 0.066), ('loss_giou', 0.184), ('loss_self_iou', 0.095), ('cardinality_error', 3.693), ('loss_ce_0', 0.298), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.693), ('loss_caption_0', 2.902), ('loss_caption', 2.903), ('total_loss', 14.392)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 98000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.115), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.089), ('cardinality_error', 3.738), ('loss_ce_0', 0.298), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.738), ('loss_caption_0', 2.896), ('loss_caption', 2.902), ('total_loss', 14.361)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 99000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.115), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.095), ('cardinality_error', 3.702), ('loss_ce_0', 0.296), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.702), ('loss_caption_0', 2.956), ('loss_caption', 2.956), ('total_loss', 14.525)]), +time/iter = 0.195, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 100000 (epoch 9), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.092), ('cardinality_error', 3.751), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.751), ('loss_caption_0', 2.932), ('loss_caption', 2.932), ('total_loss', 14.453)]), +time/iter = 0.191, bad_vid = 0.000 + +Validation results of iter 100090: +Bleu_1:0.16664911544364056 +Bleu_2:0.09023295213839283 +Bleu_3:0.04763940550902772 +Bleu_4:0.02409205514859969 +METEOR:0.0878588871148787 +ROUGE_L:0.16401896184386325 +CIDEr:0.31947446694949533 +Recall:0.5282742157284517 +Precision:0.5750796556165633 +soda_c:0.05745241491068406 +para_Bleu_1:0.46204429574393835 +para_Bleu_2:0.2749900961045832 +para_Bleu_3:0.1683879565471281 +para_Bleu_4:0.10624339593597942 +para_METEOR:0.16245439213508253 +para_ROUGE_L:0.3162965936511474 +para_CIDEr:0.20803178964320856 + +overall score of iter 100090: 0.4767295777142705 + +Save model at iter 100090 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 100090 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 101000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.849), ('loss_caption', 2.847), ('total_loss', 14.064)]), +time/iter = 0.713, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 102000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.116), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.093), ('cardinality_error', 3.695), ('loss_ce_0', 0.293), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.85), ('loss_caption', 2.848), ('total_loss', 14.087)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 103000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.115), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.724), ('loss_ce_0', 0.293), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.846), ('loss_caption', 2.854), ('total_loss', 14.092)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 104000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.097), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.916), ('loss_caption', 2.913), ('total_loss', 14.362)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 105000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.907), ('loss_caption', 2.902), ('total_loss', 14.342)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 106000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.11), ('cardinality_error', 3.775), ('loss_ce_0', 0.293), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.876), ('loss_caption', 2.875), ('total_loss', 14.264)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 107000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.178), ('loss_self_iou', 0.099), ('cardinality_error', 3.743), ('loss_ce_0', 0.291), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.91), ('loss_caption', 2.909), ('total_loss', 14.358)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 108000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.118), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.1), ('cardinality_error', 3.81), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.81), ('loss_caption_0', 2.928), ('loss_caption', 2.93), ('total_loss', 14.446)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 109000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.118), ('loss_bbox', 0.063), ('loss_giou', 0.178), ('loss_self_iou', 0.091), ('cardinality_error', 3.78), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.78), ('loss_caption_0', 2.916), ('loss_caption', 2.912), ('total_loss', 14.396)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 110000 (epoch 10), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.087), ('cardinality_error', 3.72), ('loss_ce_0', 0.297), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.948), ('loss_caption', 2.948), ('total_loss', 14.539)]), +time/iter = 0.196, bad_vid = 0.000 + +Validation results of iter 110099: +Bleu_1:0.1671778590456048 +Bleu_2:0.09077014613023152 +Bleu_3:0.0476684747303012 +Bleu_4:0.02445564298599047 +METEOR:0.08933235383587503 +ROUGE_L:0.1654660162888944 +CIDEr:0.31886265111118334 +Recall:0.5314017615268335 +Precision:0.5831469052945512 +soda_c:0.05853263249839839 +para_Bleu_1:0.46544090189732323 +para_Bleu_2:0.2789325258737778 +para_Bleu_3:0.17172911957785325 +para_Bleu_4:0.10903514181091935 +para_METEOR:0.16550159188298816 +para_ROUGE_L:0.3181118223429575 +para_CIDEr:0.2056618808195008 + +overall score of iter 110099: 0.4801986145134083 + +Save model at iter 110099 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 110099 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 111000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.095), ('cardinality_error', 3.718), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.718), ('loss_caption_0', 2.867), ('loss_caption', 2.869), ('total_loss', 14.14)]), +time/iter = 0.727, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 112000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.725), ('loss_ce_0', 0.289), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.844), ('loss_caption', 2.842), ('total_loss', 14.015)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 113000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.734), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.734), ('loss_caption_0', 2.837), ('loss_caption', 2.834), ('total_loss', 13.981)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 114000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.739), ('loss_ce_0', 0.285), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.739), ('loss_caption_0', 2.855), ('loss_caption', 2.857), ('total_loss', 14.084)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 115000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.175), ('loss_self_iou', 0.092), ('cardinality_error', 3.74), ('loss_ce_0', 0.284), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.823), ('loss_caption', 2.824), ('total_loss', 13.959)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 116000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.177), ('loss_self_iou', 0.088), ('cardinality_error', 3.753), ('loss_ce_0', 0.288), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.846), ('loss_caption', 2.843), ('total_loss', 14.073)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 117000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.755), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.804), ('loss_caption', 2.81), ('total_loss', 13.896)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 118000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.093), ('cardinality_error', 3.715), ('loss_ce_0', 0.285), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.715), ('loss_caption_0', 2.863), ('loss_caption', 2.866), ('total_loss', 14.129)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 119000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.064), ('loss_giou', 0.176), ('loss_self_iou', 0.098), ('cardinality_error', 3.735), ('loss_ce_0', 0.287), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.735), ('loss_caption_0', 2.844), ('loss_caption', 2.843), ('total_loss', 14.061)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 120000 (epoch 11), +loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.175), ('loss_self_iou', 0.101), ('cardinality_error', 3.755), ('loss_ce_0', 0.285), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.868), ('loss_caption', 2.878), ('total_loss', 14.168)]), +time/iter = 0.190, bad_vid = 0.000 + +Validation results of iter 120108: +Bleu_1:0.16560019346009094 +Bleu_2:0.08934946581658681 +Bleu_3:0.04692472826903507 +Bleu_4:0.023331060597699706 +METEOR:0.08861943572471001 +ROUGE_L:0.16392659155605854 +CIDEr:0.31177527957257306 +Recall:0.5248955646301546 +Precision:0.5713061826316813 +soda_c:0.056694173808073595 +para_Bleu_1:0.45551540477127933 +para_Bleu_2:0.2725270289009415 +para_Bleu_3:0.16731081427102573 +para_Bleu_4:0.10555679460767188 +para_METEOR:0.1665724805603667 +para_ROUGE_L:0.31619749898051375 +para_CIDEr:0.19719071969736374 + +overall score of iter 120108: 0.4693199948654023 + +Save model at iter 120108 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 121000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.095), ('cardinality_error', 3.691), ('loss_ce_0', 0.284), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.809), ('loss_caption', 2.808), ('total_loss', 13.835)]), +time/iter = 0.727, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 122000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.17), ('loss_self_iou', 0.093), ('cardinality_error', 3.706), ('loss_ce_0', 0.281), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.706), ('loss_caption_0', 2.811), ('loss_caption', 2.814), ('total_loss', 13.867)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 123000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.691), ('loss_ce_0', 0.281), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.789), ('loss_caption', 2.797), ('total_loss', 13.808)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 124000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.063), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.76), ('loss_ce_0', 0.281), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.76), ('loss_caption_0', 2.839), ('loss_caption', 2.842), ('total_loss', 13.984)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 125000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.763), ('loss_ce_0', 0.282), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.763), ('loss_caption_0', 2.81), ('loss_caption', 2.815), ('total_loss', 13.898)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 126000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.095), ('cardinality_error', 3.717), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.717), ('loss_caption_0', 2.789), ('loss_caption', 2.787), ('total_loss', 13.835)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 127000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.867), ('loss_caption', 2.871), ('total_loss', 14.097)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 128000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.113), ('loss_bbox', 0.063), ('loss_giou', 0.173), ('loss_self_iou', 0.092), ('cardinality_error', 3.793), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.793), ('loss_caption_0', 2.868), ('loss_caption', 2.863), ('total_loss', 14.111)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 129000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.1), ('cardinality_error', 3.686), ('loss_ce_0', 0.283), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.686), ('loss_caption_0', 2.812), ('loss_caption', 2.813), ('total_loss', 13.903)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 130000 (epoch 12), +loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.772), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.772), ('loss_caption_0', 2.86), ('loss_caption', 2.861), ('total_loss', 14.105)]), +time/iter = 0.190, bad_vid = 0.000 + +Validation results of iter 130117: +Bleu_1:0.16778675341331784 +Bleu_2:0.09082555766488616 +Bleu_3:0.047445681271689716 +Bleu_4:0.02375280793420285 +METEOR:0.08883520478698428 +ROUGE_L:0.16531435721130755 +CIDEr:0.31778343902267087 +Recall:0.5273619026669621 +Precision:0.5698181479221706 +soda_c:0.05753856798988932 +para_Bleu_1:0.4610381779339771 +para_Bleu_2:0.2761144617772928 +para_Bleu_3:0.16915034097081671 +para_Bleu_4:0.10654029953240575 +para_METEOR:0.16638305166981465 +para_ROUGE_L:0.31710573495570465 +para_CIDEr:0.19601570682645908 + +overall score of iter 130117: 0.46893905802867947 + +Save model at iter 130117 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 131000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.062), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.75), ('loss_ce_0', 0.279), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.75), ('loss_caption_0', 2.817), ('loss_caption', 2.826), ('total_loss', 13.897)]), +time/iter = 0.734, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 132000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.109), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.089), ('cardinality_error', 3.814), ('loss_ce_0', 0.274), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.814), ('loss_caption_0', 2.778), ('loss_caption', 2.776), ('total_loss', 13.726)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 133000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.095), ('cardinality_error', 3.773), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.773), ('loss_caption_0', 2.843), ('loss_caption', 2.843), ('total_loss', 13.999)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 134000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.108), ('loss_bbox', 0.065), ('loss_giou', 0.171), ('loss_self_iou', 0.101), ('cardinality_error', 3.743), ('loss_ce_0', 0.276), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.786), ('loss_caption', 2.787), ('total_loss', 13.756)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 135000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.794), ('loss_ce_0', 0.281), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.785), ('loss_caption', 2.784), ('total_loss', 13.759)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 136000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.065), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.653), ('loss_ce_0', 0.279), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.828), ('loss_caption', 2.834), ('total_loss', 13.919)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 137000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.105), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.099), ('cardinality_error', 3.654), ('loss_ce_0', 0.281), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.654), ('loss_caption_0', 2.79), ('loss_caption', 2.799), ('total_loss', 13.806)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 138000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.171), ('loss_self_iou', 0.095), ('cardinality_error', 3.714), ('loss_ce_0', 0.28), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.835), ('loss_caption', 2.828), ('total_loss', 13.945)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 139000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.098), ('cardinality_error', 3.813), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.83), ('loss_caption', 2.828), ('total_loss', 13.924)]), +time/iter = 0.186, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 140000 (epoch 13), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.09), ('cardinality_error', 3.664), ('loss_ce_0', 0.28), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.821), ('loss_caption', 2.823), ('total_loss', 13.905)]), +time/iter = 0.191, bad_vid = 0.000 + +Validation results of iter 140126: +Bleu_1:0.16683698969676453 +Bleu_2:0.09036855967772307 +Bleu_3:0.047484441130632896 +Bleu_4:0.023876859658376735 +METEOR:0.08814626862844692 +ROUGE_L:0.16473003568483396 +CIDEr:0.3189568758512915 +Recall:0.5281546209817979 +Precision:0.5704333604501349 +soda_c:0.057417105431783064 +para_Bleu_1:0.4580706340663244 +para_Bleu_2:0.27372623489326064 +para_Bleu_3:0.16745128920972313 +para_Bleu_4:0.10550306643408856 +para_METEOR:0.16656454278617736 +para_ROUGE_L:0.31631873012989425 +para_CIDEr:0.19724321819057877 + +overall score of iter 140126: 0.46931082741084473 + +Save model at iter 140126 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 141000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.171), ('loss_self_iou', 0.106), ('cardinality_error', 3.774), ('loss_ce_0', 0.27), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.75), ('loss_caption', 2.748), ('total_loss', 13.572)]), +time/iter = 0.739, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 142000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.173), ('loss_self_iou', 0.091), ('cardinality_error', 3.797), ('loss_ce_0', 0.272), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.797), ('loss_caption_0', 2.72), ('loss_caption', 2.722), ('total_loss', 13.492)]), +time/iter = 0.186, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 143000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.162), ('loss_self_iou', 0.095), ('cardinality_error', 3.637), ('loss_ce_0', 0.268), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.637), ('loss_caption_0', 2.782), ('loss_caption', 2.782), ('total_loss', 13.626)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 144000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.112), ('loss_bbox', 0.062), ('loss_giou', 0.172), ('loss_self_iou', 0.094), ('cardinality_error', 3.831), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.831), ('loss_caption_0', 2.793), ('loss_caption', 2.79), ('total_loss', 13.773)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 145000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.101), ('loss_bbox', 0.061), ('loss_giou', 0.16), ('loss_self_iou', 0.093), ('cardinality_error', 3.665), ('loss_ce_0', 0.273), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.168), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.762), ('loss_caption', 2.767), ('total_loss', 13.554)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 146000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.725), ('loss_ce_0', 0.276), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.813), ('loss_caption', 2.813), ('total_loss', 13.811)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 147000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.097), ('cardinality_error', 3.714), ('loss_ce_0', 0.273), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.747), ('loss_caption', 2.745), ('total_loss', 13.578)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 148000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.728), ('loss_ce_0', 0.274), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.728), ('loss_caption_0', 2.843), ('loss_caption', 2.84), ('total_loss', 13.944)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 149000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.799), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.836), ('loss_caption', 2.836), ('total_loss', 13.926)]), +time/iter = 0.196, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 150000 (epoch 14), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.087), ('cardinality_error', 3.703), ('loss_ce_0', 0.272), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.703), ('loss_caption_0', 2.806), ('loss_caption', 2.806), ('total_loss', 13.795)]), +time/iter = 0.193, bad_vid = 0.000 + +Validation results of iter 150135: +Bleu_1:0.16662144072598145 +Bleu_2:0.08988753231411394 +Bleu_3:0.04690847145308288 +Bleu_4:0.023224274927987735 +METEOR:0.08725158341768323 +ROUGE_L:0.16364893754496343 +CIDEr:0.32028824475030926 +Recall:0.5260420675803493 +Precision:0.5630584367161506 +soda_c:0.057565785652999135 +para_Bleu_1:0.46764194087144684 +para_Bleu_2:0.2801629240374498 +para_Bleu_3:0.1713033186995987 +para_Bleu_4:0.10750827268624512 +para_METEOR:0.16742715934059368 +para_ROUGE_L:0.31858424377772926 +para_CIDEr:0.2089956210595351 + +overall score of iter 150135: 0.4839310530863739 + +Save model at iter 150135 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 150135 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 151000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.101), ('loss_bbox', 0.063), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.645), ('loss_ce_0', 0.266), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.645), ('loss_caption_0', 2.762), ('loss_caption', 2.759), ('total_loss', 13.537)]), +time/iter = 0.737, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 152000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.087), ('cardinality_error', 3.722), ('loss_ce_0', 0.269), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.762), ('loss_caption', 2.766), ('total_loss', 13.59)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 153000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.111), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.083), ('cardinality_error', 3.813), ('loss_ce_0', 0.267), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.085), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.777), ('loss_caption', 2.778), ('total_loss', 13.663)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 154000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.106), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.769), ('loss_ce_0', 0.272), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.787), ('loss_caption', 2.787), ('total_loss', 13.717)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 155000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.09), ('cardinality_error', 3.714), ('loss_ce_0', 0.267), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.758), ('loss_caption', 2.76), ('total_loss', 13.593)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 156000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.167), ('loss_self_iou', 0.102), ('cardinality_error', 3.675), ('loss_ce_0', 0.269), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.675), ('loss_caption_0', 2.741), ('loss_caption', 2.742), ('total_loss', 13.504)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 157000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.267), ('loss_counter', 0.104), ('loss_bbox', 0.065), ('loss_giou', 0.167), ('loss_self_iou', 0.103), ('cardinality_error', 3.722), ('loss_ce_0', 0.268), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.777), ('loss_caption', 2.783), ('total_loss', 13.668)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 158000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.099), ('cardinality_error', 3.758), ('loss_ce_0', 0.27), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.758), ('loss_caption_0', 2.815), ('loss_caption', 2.817), ('total_loss', 13.789)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 159000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.729), ('loss_ce_0', 0.275), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.783), ('loss_caption', 2.785), ('total_loss', 13.721)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 160000 (epoch 15), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.109), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.098), ('cardinality_error', 3.816), ('loss_ce_0', 0.271), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.78), ('loss_caption', 2.784), ('total_loss', 13.686)]), +time/iter = 0.196, bad_vid = 0.000 + +Validation results of iter 160144: +Bleu_1:0.16754398447821903 +Bleu_2:0.08978801866243748 +Bleu_3:0.046077601805781236 +Bleu_4:0.02215727819941335 +METEOR:0.08650894641812401 +ROUGE_L:0.16425299709373153 +CIDEr:0.3192637628790779 +Recall:0.5308598805776927 +Precision:0.5705477594739302 +soda_c:0.059035206979637336 +para_Bleu_1:0.4722129873397206 +para_Bleu_2:0.2843271953295457 +para_Bleu_3:0.17433620623201318 +para_Bleu_4:0.10943737200004257 +para_METEOR:0.16524483023272712 +para_ROUGE_L:0.3180351825656492 +para_CIDEr:0.2139382514781602 + +overall score of iter 160144: 0.4886204537109299 + +Save model at iter 160144 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 160144 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 161000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.103), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.695), ('loss_ce_0', 0.263), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.766), ('loss_caption', 2.768), ('total_loss', 13.553)]), +time/iter = 0.749, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 162000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.694), ('loss_ce_0', 0.266), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.768), ('loss_caption', 2.764), ('total_loss', 13.573)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 163000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.173), ('loss_self_iou', 0.097), ('cardinality_error', 3.769), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.765), ('loss_caption', 2.766), ('total_loss', 13.63)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 164000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.11), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.774), ('loss_ce_0', 0.269), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.772), ('loss_caption', 2.776), ('total_loss', 13.625)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 165000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.102), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.699), ('loss_ce_0', 0.267), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.711), ('loss_caption', 2.716), ('total_loss', 13.368)]), +time/iter = 0.187, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 166000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.105), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.72), ('loss_ce_0', 0.268), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.754), ('loss_caption', 2.755), ('total_loss', 13.534)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 167000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.095), ('cardinality_error', 3.712), ('loss_ce_0', 0.266), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.712), ('loss_caption_0', 2.771), ('loss_caption', 2.772), ('total_loss', 13.617)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 168000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.09), ('cardinality_error', 3.816), ('loss_ce_0', 0.269), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.814), ('loss_caption', 2.82), ('total_loss', 13.826)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 169000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.106), ('cardinality_error', 3.697), ('loss_ce_0', 0.261), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.697), ('loss_caption_0', 2.769), ('loss_caption', 2.775), ('total_loss', 13.598)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 170000 (epoch 16), +loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.105), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.093), ('cardinality_error', 3.799), ('loss_ce_0', 0.272), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.794), ('loss_caption', 2.798), ('total_loss', 13.727)]), +time/iter = 0.191, bad_vid = 0.000 + +Validation results of iter 170153: +Bleu_1:0.16584280243722227 +Bleu_2:0.08889969905794425 +Bleu_3:0.04569298286173284 +Bleu_4:0.021992960199339176 +METEOR:0.08570833880397384 +ROUGE_L:0.16234979503724006 +CIDEr:0.3170462149966731 +Recall:0.5273397281824633 +Precision:0.5648989898989865 +soda_c:0.058539462474976364 +para_Bleu_1:0.4735378044184376 +para_Bleu_2:0.2855599966961999 +para_Bleu_3:0.17485842077678387 +para_Bleu_4:0.10998333079246524 +para_METEOR:0.16580782598840993 +para_ROUGE_L:0.3184105968751349 +para_CIDEr:0.2144083270960459 + +overall score of iter 170153: 0.4901994838769211 + +Save model at iter 170153 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 170153 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 171000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.161), ('loss_self_iou', 0.094), ('cardinality_error', 3.694), ('loss_ce_0', 0.261), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.772), ('loss_caption', 2.77), ('total_loss', 13.544)]), +time/iter = 0.745, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 172000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.096), ('cardinality_error', 3.667), ('loss_ce_0', 0.262), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.741), ('loss_caption', 2.743), ('total_loss', 13.47)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 173000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.104), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.09), ('cardinality_error', 3.753), ('loss_ce_0', 0.261), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.786), ('loss_caption', 2.785), ('total_loss', 13.646)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 174000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.107), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.094), ('cardinality_error', 3.832), ('loss_ce_0', 0.261), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.832), ('loss_caption_0', 2.733), ('loss_caption', 2.738), ('total_loss', 13.457)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 175000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.163), ('loss_self_iou', 0.098), ('cardinality_error', 3.731), ('loss_ce_0', 0.259), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.062), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.731), ('loss_caption_0', 2.745), ('loss_caption', 2.744), ('total_loss', 13.454)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 176000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.095), ('cardinality_error', 3.795), ('loss_ce_0', 0.264), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.795), ('loss_caption_0', 2.761), ('loss_caption', 2.77), ('total_loss', 13.575)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 177000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.161), ('loss_self_iou', 0.096), ('cardinality_error', 3.652), ('loss_ce_0', 0.261), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.745), ('total_loss', 13.43)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 178000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.103), ('cardinality_error', 3.664), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.682), ('loss_caption', 2.68), ('total_loss', 13.211)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 179000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.105), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.09), ('cardinality_error', 3.825), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.825), ('loss_caption_0', 2.788), ('loss_caption', 2.796), ('total_loss', 13.671)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 180000 (epoch 17), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.781), ('loss_caption', 2.775), ('total_loss', 13.608)]), +time/iter = 0.192, bad_vid = 0.000 + +Validation results of iter 180162: +Bleu_1:0.16720622564646215 +Bleu_2:0.08946643461131876 +Bleu_3:0.04568137095423273 +Bleu_4:0.022039722503534608 +METEOR:0.08588931176535387 +ROUGE_L:0.16315869782389542 +CIDEr:0.32099741016990446 +Recall:0.5265047853249455 +Precision:0.5647345942647923 +soda_c:0.05847424883094643 +para_Bleu_1:0.47508155945278135 +para_Bleu_2:0.2858233856765029 +para_Bleu_3:0.17499503512152859 +para_Bleu_4:0.11002968407978216 +para_METEOR:0.16541373751181562 +para_ROUGE_L:0.3190110890037882 +para_CIDEr:0.21421557986951392 + +overall score of iter 180162: 0.4896590014611117 + +Save model at iter 180162 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 181000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.781), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.781), ('loss_caption_0', 2.743), ('loss_caption', 2.746), ('total_loss', 13.452)]), +time/iter = 0.750, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 182000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.1), ('cardinality_error', 3.726), ('loss_ce_0', 0.26), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.748), ('loss_caption', 2.746), ('total_loss', 13.472)]), +time/iter = 0.189, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 183000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.722), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.729), ('loss_caption', 2.734), ('total_loss', 13.405)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 184000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.726), ('loss_ce_0', 0.257), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.783), ('loss_caption', 2.787), ('total_loss', 13.591)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 185000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.098), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.087), ('cardinality_error', 3.667), ('loss_ce_0', 0.26), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.718), ('loss_caption', 2.716), ('total_loss', 13.354)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 186000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.776), ('loss_ce_0', 0.259), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.776), ('loss_caption_0', 2.75), ('loss_caption', 2.75), ('total_loss', 13.494)]), +time/iter = 0.194, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 187000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.803), ('loss_ce_0', 0.264), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.803), ('loss_caption_0', 2.788), ('loss_caption', 2.791), ('total_loss', 13.678)]), +time/iter = 0.198, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 188000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.1), ('loss_bbox', 0.062), ('loss_giou', 0.163), ('loss_self_iou', 0.091), ('cardinality_error', 3.71), ('loss_ce_0', 0.259), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.71), ('loss_caption_0', 2.745), ('loss_caption', 2.743), ('total_loss', 13.444)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 189000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.165), ('loss_self_iou', 0.1), ('cardinality_error', 3.748), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.748), ('loss_caption_0', 2.751), ('loss_caption', 2.753), ('total_loss', 13.484)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 190000 (epoch 18), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.104), ('loss_bbox', 0.06), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 2.729), ('loss_caption', 2.73), ('total_loss', 13.395)]), +time/iter = 0.189, bad_vid = 0.000 + +Validation results of iter 190171: +Bleu_1:0.1662475028889873 +Bleu_2:0.08895418147726737 +Bleu_3:0.04559170272578064 +Bleu_4:0.021869443641790748 +METEOR:0.0853620749347768 +ROUGE_L:0.16226693807975517 +CIDEr:0.3203697867996399 +Recall:0.5243080966273422 +Precision:0.5592002237136435 +soda_c:0.058066485957305666 +para_Bleu_1:0.47302383939773723 +para_Bleu_2:0.2848420020452884 +para_Bleu_3:0.17477626094199183 +para_Bleu_4:0.11005159892431456 +para_METEOR:0.16474042555391544 +para_ROUGE_L:0.31754161420686944 +para_CIDEr:0.2082818020277855 + +overall score of iter 190171: 0.4830738265060155 + +Save model at iter 190171 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 191000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.086), ('cardinality_error', 3.653), ('loss_ce_0', 0.257), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.754), ('loss_caption', 2.752), ('total_loss', 13.501)]), +time/iter = 0.755, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 192000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.1), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.094), ('cardinality_error', 3.767), ('loss_ce_0', 0.258), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.767), ('loss_caption_0', 2.717), ('loss_caption', 2.72), ('total_loss', 13.343)]), +time/iter = 0.188, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 193000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.106), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.093), ('cardinality_error', 3.847), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.847), ('loss_caption_0', 2.754), ('loss_caption', 2.759), ('total_loss', 13.499)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 194000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.165), ('loss_self_iou', 0.097), ('cardinality_error', 3.775), ('loss_ce_0', 0.262), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.769), ('loss_caption', 2.772), ('total_loss', 13.587)]), +time/iter = 0.192, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 195000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.794), ('loss_ce_0', 0.261), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.751), ('loss_caption', 2.751), ('total_loss', 13.506)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 196000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.095), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.1), ('cardinality_error', 3.652), ('loss_ce_0', 0.258), ('loss_counter_0', 0.095), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.735), ('total_loss', 13.403)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 197000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.091), ('cardinality_error', 3.759), ('loss_ce_0', 0.258), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.759), ('loss_caption_0', 2.74), ('loss_caption', 2.743), ('total_loss', 13.418)]), +time/iter = 0.191, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 198000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.098), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.092), ('cardinality_error', 3.664), ('loss_ce_0', 0.255), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.718), ('loss_caption', 2.72), ('total_loss', 13.31)]), +time/iter = 0.190, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 199000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.101), ('cardinality_error', 3.736), ('loss_ce_0', 0.257), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.759), ('loss_caption', 2.76), ('total_loss', 13.502)]), +time/iter = 0.193, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 200000 (epoch 19), +loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.159), ('loss_self_iou', 0.098), ('cardinality_error', 3.701), ('loss_ce_0', 0.259), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.701), ('loss_caption_0', 2.766), ('loss_caption', 2.771), ('total_loss', 13.518)]), +time/iter = 0.190, bad_vid = 0.000 + +Validation results of iter 200180: +Bleu_1:0.16600244771432068 +Bleu_2:0.08859363359362551 +Bleu_3:0.045174799285766926 +Bleu_4:0.021453706973694267 +METEOR:0.08469975853590762 +ROUGE_L:0.1615333099598977 +CIDEr:0.3178372173219055 +Recall:0.5270524681293403 +Precision:0.5612365263371945 +soda_c:0.05852570981425518 +para_Bleu_1:0.47641872729084495 +para_Bleu_2:0.28679556025023933 +para_Bleu_3:0.1757988669447671 +para_Bleu_4:0.11061748158923715 +para_METEOR:0.1647238014039032 +para_ROUGE_L:0.3182336912910021 +para_CIDEr:0.21852415031403352 + +overall score of iter 200180: 0.4938654333071738 + +Save model at iter 200180 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +Save Best-model at iter 200180 to checkpoint file. +Save info to info.json +Best epoch: 10 + +Best Model Performance: +Bleu_1:0.1671778590456048 +Bleu_2:0.09077014613023152 +Bleu_3:0.0476684747303012 +Bleu_4:0.02445564298599047 +METEOR:0.08933235383587503 +ROUGE_L:0.1654660162888944 +CIDEr:0.31886265111118334 +Recall:0.5314017615268335 +Precision:0.5831469052945512 +soda_c:0.05853263249839839 +para_Bleu_1:0.46544090189732323 +para_Bleu_2:0.2789325258737778 +para_Bleu_3:0.17172911957785325 +para_Bleu_4:0.10903514181091935 +para_METEOR:0.16550159188298816 +para_ROUGE_L:0.3181118223429575 +para_CIDEr:0.2056618808195008 +avg_proposal_number:-1 + +Best Overall Score epoch10: 1.5812763042668414 + diff --git a/anet_clip/val.log b/anet_clip/val.log new file mode 100644 index 0000000000000000000000000000000000000000..2937f5d88e3790d388f53f3845a2179514931da2 --- /dev/null +++ b/anet_clip/val.log @@ -0,0 +1,21 @@ +Best Model Performance: +Bleu_1:0.1671778590456048 +Bleu_2:0.09077014613023152 +Bleu_3:0.0476684747303012 +Bleu_4:0.02445564298599047 +METEOR:0.08933235383587503 +ROUGE_L:0.1654660162888944 +CIDEr:0.31886265111118334 +Recall:0.5314017615268335 +Precision:0.5831469052945512 +soda_c:0.05853263249839839 +para_Bleu_1:0.46544090189732323 +para_Bleu_2:0.2789325258737778 +para_Bleu_3:0.17172911957785325 +para_Bleu_4:0.10903514181091935 +para_METEOR:0.16550159188298816 +para_ROUGE_L:0.3181118223429575 +para_CIDEr:0.2056618808195008 +avg_proposal_number:-1 + +Best Overall Score epoch10: 1.5812763042668414 diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..63d60c15c02ba592a06fc67e09c654d568891054 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..9569dff52f023f43117ca926bbde3e1f14003fdd --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..0b28f2bcdbfef92df0153ebf03faaa2bc73158a1 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c9314eb31f87ff6f0f44cbcf948b2c4224a9eafa --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa91240b87d388c80f808d8a78858fc60e197ed5 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..201c1ff4ff577f8ff9d247b699a0118d13adb728 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..6a34d7ea66b7574d48f980820ae8fd055632c014 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..8f7cc5a1f6a8314a0fb47ec38587b39870114639 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..423e5ab89a334fa4ddb0234345c578eee20851cd --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..4a5acd993bcf0a0f439319aef297c4eaf9ec2b15 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..70fa415758e8d88826ce9466a1533dbf91cbcf95 --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1bba6ba8d60d7c73b4a8f81a30d3bfafbcc6c1bf --- /dev/null +++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 30 +width_ratio: 2 # scale for the width of the network +iteration: 3 +width_th: 2 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..864c3a0fc0ada3b8ae6d5c81edc5d12586d3123e --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a34730f54add9830465c52e42fbfc9536b95a29 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..de7481364454b43fb87a1655b09d949110b25c5c --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..34eecf5b883e5b9c6f750e1e747313b6202c5291 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..61d3e52456e1ea6b0d6726c159018cade8a37d34 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..a07a5868fc11247094f40d8d8350e9088832eb8d --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d52bdad23f666dc1d065a692affd57950d91ee5 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ba6d107bf6ea0c714bdcee68f97e22bf3d94f03 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..002f85af4093414f60b6e37e5edc14b204758ac1 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..b3f476c95ea36a4bf987b132a41393c5d09ef19c --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..b505369a59c4e6956fc3222dea1d31be4a831ff8 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..b3dd203151d68d04d7367c1bb92602c4c9c44036 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..fcfaef85faa02aabfc3633861e2c3b3f88ec2b66 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..97fbc46c507b17e85a2c8ec633ac1d01645609d3 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..d01e18020cd386eb7f5db9fd00662eb3992740eb --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a678cdbd0c0195b00d3315750ec658810a0bfaa --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6c339820bdc37d7f054932b6d74615188021197d --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c19e8416b340d0c95e10fd3390f640a07f7184f5 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7cf973f734fd70cea269c6a60dc0093e29bbc04 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b4b417c3f5f2d549211c84b1587f94bafa5cf9a --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f144a283f2b705dfac08821fb6b1038d07d4fe7b --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..0141ba07a86d4a5fc85962f8185ca6771c0f149d --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6dde138a7b891c588267ba53f189063824fc4fbb --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..b8d051a83ac473fc18d14c10b8226dc414381d9c --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c4d48bc7e63e6428984d0bc2129742f2f7dbc262 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..7555c91df9a6110009920a7b5ac22c155cc59cfe --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..820f13ab195e62fe83b6a5c8d8086ef3ffb62b28 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..62551ec728f7b1283c495996b80d72abe2302686 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..3a2cda8b9fa5b1093cd4327dcc407bff408a00e6 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..e270b46619490eec7d96e25950138a4e96238d6a --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..65ca7f9d880f365ced5096533819011ad152b1be --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c9d59a3f0a2f985360f987d74142d74c3ad8ce9b --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..87364862dc54a01ff7835edf337d948ef7aff565 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..73c09505c83eae12bc26b6b16f8e4239aa5914d8 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff51f66d82ad89fac2bd3d3340d30d2d2d5c1885 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..a9391a3c8b96f98d601212fe5fee01d56fd73b2f --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..756953c9ca6e0fc91efe05cd28cd8d01f18c1700 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..fdc8083bf18fbabbcdc25a93b1f095f6a276a544 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c6bbfc78122b62b4c6f8bb4abae8adc706e30ad --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8527b0a6296d1a970dca3691f3d31ea3dfa281d --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d9d74565b11b912a681a1871360ed0bd2385ff9 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..57f86183de530604294a363c8387c1c8b49e93af --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..de9accc5c444a46a8639dc63b3d2841b6b1fdb28 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 15 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..a600901cb9f10422e2ce532cb8c77ac03dc57959 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..29e6ac298128de00d5eafc256f3cefc35eb26585 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml new file mode 100644 index 0000000000000000000000000000000000000000..dae41f38476b77921cdb2f030d11a1f32076622b --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml @@ -0,0 +1,14 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml + + +refine_pseudo_box: 0 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..3741fe96fbe15b96ea12feca4e9fa98e58b4b141 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ee94d104a9e0878da2aa2e588adeb888ff12355 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc15339112fa0ff01c5615b311bebee685e3c089 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..38f4cd642822a36efd860f20971030093b467b26 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..c08068e50a1b52db2346ed7d91f994822ecb308a --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..e3d37e0af57206d3a8ace41cd93cac2d92a99aad --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..39fac1ab1f5ba0ef8be9166ab400b0303dab3c55 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 2 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..2342e4a5a58d9938a847f0bf11ea87de5900dadf --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..d90465e8281858d7557440ae000a2d8030b5f1be --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca08a4041f2660b9eef0f6db53a672d88bfaa52e --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 3 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..18d258a9eff34e7bbbcdebcd0462250746352d21 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 4 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..739efd5c2d9c526fac569d14c81206c02a677755 --- /dev/null +++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml @@ -0,0 +1,20 @@ +id: '' +base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +top_frames: 15 +width_ratio: 1 # scale for the width of the network +iteration: 3 +width_th: 1 + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 5 +pseudo_box_type: similarity_op_order_v2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbc433e0a1e0d5b37361a96e3970c0d720639db4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml @@ -0,0 +1,17 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d4e2a056537258e7c06849d22d4c26c7b25e223f --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml @@ -0,0 +1,17 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..72849bd96ab440b568774e6ee8a57f6ed6788162 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml @@ -0,0 +1,27 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: similarity +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..fbcd022d8623d2fa4e95c31b1f8f6adef8076c1f --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml @@ -0,0 +1,27 @@ +id: base # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: similarity +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa5891c2d59ff2e88a6ccbca706f7ca15f539976 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9d3fff7669479f43563fb8f80dc98091b835cd49 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d00afb6f6a5f3d979ee0b513299460ed59528d71 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b56d4945517c6bd61351fcbef05b48c9b7448d25 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ee0f7cd39031aac12a49fa3febbad874ee84eb3 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ee646dbe8a44dc5bd827e2ff354f954306512e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bcc52404af176f94a0d3cecc3fcff26900f73e07 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c22d10c1f33975e94f1777fc6529b90feb81ba71 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a4ee8540f6cbd301e407c8fd4795518a729b2a7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e5cc7747b0917df6ff67aa7af2dcc961853d3643 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8ec8379c8cfae0384ed5c5a23cdc8ba28b250d8 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7174b3bc97912d16e2db4f3948daf1d5ccca79d2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..047111607ce477a768f570d0c1bdbe1809fd2b27 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ec22f2c3149a6bc2a839c8b7a1e7f95f799a740b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bcd7b440c4ff76e8b2cfa3324524c6a0fb7f8b3e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7ceeeab0f8eddaef985f7f96f30092bf3be1c477 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..b9514fabfbb681385913ea0ab7edaedaaa62b628 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..521dc511180da8f9b94569e4ab0a45844266d973 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c59ddd576b3753f4db9a137e8d3cb80dd233e0a4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cbe60a5936fe763099137039a222cd0563ebaea9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9960dcaa94683c69f1c63bd6fadef7b17315ebea --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e57a1150b7f8eb19b75d32729945101e3f63970a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a639c02411b28e7b61485c8306201526a98bb30 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..09300cdacf1b21dae6e6d983cc53c0c3674f146c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..d933f4e367996fcd355585308151316fde844160 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9289c03643143a572b64987cf8b733118531b7a2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..ecd4bd64bff3f0c9ec4c9a532146ccd657edc907 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d228dc7e9de40c84d91a90b4e9e3accd41af0d7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6fe87609b78a2e77ad203ca2136882ba13568493 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..d21448627979c6757d3299faee2ea5ab4d2d1b09 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc8c5a5dc77dae477e0f880e515993a46357a8c8 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..665f42194576bed58d657628d916710efa51b514 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc8f3897e2e398bf6cedf9c02936553098f2c73 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ac2b451a19de13876e1d0dd042878289fdaa195 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..935b0ececa15dcf4658c1e10a5ae52b93079b0fc --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml @@ -0,0 +1,11 @@ +id: anet_c3d_pdvc # the results and logs will saved in this folder ./save/id +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..e0db6b87acea5ffa66e35e868e44194a04c39852 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml @@ -0,0 +1,9 @@ +id: anet_c3d_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..828311fc71fcc95e9b1a08506d11bb6ab602b665 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml @@ -0,0 +1,53 @@ +id: anet_c3d_pdvcl + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..02b38b6f2dbbb53b838d9bfbab8cf268a7c02c62 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml @@ -0,0 +1,55 @@ +id: anet_c3d_pdvcl_gt + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 10 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +#with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0.00001 +set_cost_class: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 0 +bbox_loss_coef: 0 +cls_loss_coef: 0 +count_loss_coef: 0 +max_eseq_length: 10 +#lloss_cross_entropy: 0 +#lloss_focal_loss: 0 +#lloss_gau_mask: 1 + +#two_stage: 1 +transformer_input_type: gt_proposals \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d2aa20fce1241e60ad77a69980acf1e3b653ef1 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml @@ -0,0 +1,51 @@ +id: anet_c3d_props +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] + +train_proposal_type: gt +train_proposal_sample_num: 15 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 10 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: none +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 0 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..147d726179a848dabb0367b22575fa2f20de4097 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_i3dvgg_pdvc +base_cfg_path: cfgs_base/anet_c3d_pdvc.yml +visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish'] +visual_feature_folder: ['data/anet/features/i3d/', 'data/anet/features/i3d/', 'data/anet/features/vggish/'] +invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json'] +feature_dim: 2176 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a6991e551815ec0ac234c30ab3a6d09f1bd75cf --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_i3dvgg_pdvc_gt +base_cfg_path: cfgs_base/anet_c3d_pdvc_gt.yml +visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish'] +visual_feature_folder: ['data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/vggish_npy/'] +invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json'] +feature_dim: 2176 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..f30c2ab5a626538f4dbc2c1a1bc497196ff46f24 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..2d3b50035fe6a95bb2a5790f8b3611be54fc0fa7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..270ae993e3f72bc2d9091b4809e8715fc6c86dae --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7340a306c59e2ac685e8c59ac2960fde366e9c7b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..16f3082ecb947291ccb4f2226312fcf3fa06d349 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c167a9b0499503b9eff84d0c1ea1aa42453cf117 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c48adf788535e24daf8e7ffe16f2e60009118f1f --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..65d5217788315daae5e6bbf002eb746b010e2bde --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9dc19b7dcbc2f364b03abc0014d17eb6375b4a99 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..042d3f885aff7b261f66fa03dad252aedbf2fcf9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f6e44731cf2c826b91b8173148e120b64d04f66 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4e0f151d036988969e902c436085fa850bb50a4d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e44527dea994822b026b814df9b354aff082b53 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..08f52e1ac3b1ebca5e2d62c639fd5f6b5752ddf9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..bd202e1359b3d756d903c93acf07e4dad268323e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3fe14dc05390932f58a3f5ce8b3ffa3828296200 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..211b0adc17f02c1b64ce3ceff2c0122c7581eada --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..674edd157c6ffe26bd7e9248faffc1a68a997d35 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..20243b5ac8be187c91a7e54eb86cc27db6f21559 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cb675d9549b34cf0ee2258d3e6a107273d1e4ffd --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..586821983af378162d355b298b2788e0c651e0e6 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..42476cbf72bb3d8b304e89192a30bda1606046aa --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0a8e97b44c541a99afc965764187cf264bd4268 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9e0ce1a0db8dd453b811f6f0d5609f8ae7648a6d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8725e365b44b967e794cdc16423a571c71e33bd --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..2fd6450a47d8392d35b67b997ec6173f35b6ee4b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5cd314fb9fd20fa05b1b5417604b40d115ee008e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..23e3dae52ca63a879083d7eafc8c1ab7e1556d71 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4a108d38d2c512864344770d5943e439eb151d5 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c30ad37380e6b9ef2b845061471ab2a4ff293d91 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b140662d119689dca4e409f85da91e882323bc0f --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5e03028433a6bbc1c25ae936d791096e8e7826b2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ad1160b06416b51bd4e728eef5e6225f023796c0 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..f56ac030b287f6c0a806833b546d90a9d8fe9670 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..79f9caa36975efda224cb605af412efda721e7dc --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvc +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..c748cd44f1b9ea7607e4482da4af8444347d3f88 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..5543e4e259942b72d98f1fe16cd4311be93ef3c7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvcl +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..9804be364f78a4a8f26e30e0e6923558194edcd9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsn_pdvcl_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn'] +invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json'] +feature_dim: 3072 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..1c4ef82922a7df99f37d1a626d4a89e8c9b95722 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvc +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..df92966691ed0fa33bc4b7417f6c0ade5b383869 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvc_gt +base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..c5298c707ab8887be611c86d522e855a8a5123a4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml @@ -0,0 +1,6 @@ +id: anet_tsp_pdvcl +base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml +visual_feature_type: ['tsp'] +visual_feature_folder: ['data/anet/features/tsp'] +invalid_video_json: [] +feature_dim: 512 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml new file mode 100644 index 0000000000000000000000000000000000000000..d83ae5b6762ddc39bcdab2aedddf47a6ed8571d3 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_anet.json +vocab_size: 16221 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..889d543d5b813b8a574700f9ad209fd237144075 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_mixlm_anet.json +vocab_size: 18884 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..a7bf2a745aecc0b05232f717c81a97333ee55af3 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_puyu_anet.json +vocab_size: 15249 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..17b3bd0263edd713fc329bf1df7b539e2f160b3d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2.json +vocab_size: 14538 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..36d19db653936c2342b12bfc603de32b2295e287 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_mixlm_yc2.json +vocab_size: 17447 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..0f9ec30bf455a8a9d51bb867bdbc8e4d514c8006 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2_puyu.json +vocab_size: 13411 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml b/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml new file mode 100644 index 0000000000000000000000000000000000000000..3deec04627b419ff129a14bcf6ef5f8382bca7af --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: 'data/howto/captiondata/howto100m_train.json' +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_anet.json +vocab_size: 16221 + + + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml b/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..85343a3924a24e42054f963f220b2a3e93769070 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml @@ -0,0 +1,62 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/howto/captiondata/howto100m_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_yc2.json +vocab_size: 14538 +# dict_file: data/howto/vocabulary_howto_rate2.json +# vocab_size: 14432 + + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..92c6b64a0b9a276122b86cabe3ad428fa8fd6c8a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..87fd2de7d0e97619b3774084d4de97485ac0eedd --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..dfb930405a3050a89e929c9219635231b546d3cb --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..15f8524313eeff1620c67764a09bb5268d50c249 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..da4400435c82abc6bc70c758bdbb8d52b9d68cc2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..1c19e4b987a23b04a75b0eba01abfe0de360783c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..6141c44f52d807457f9cf0c759ae34f0ce6c024c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..57427fb4f57b5c5b3518de52811ee7d31f799017 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f45381d945d502f5a7d421ac2e2d17a7abcd5d87 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..351d2d32411e886fb1dbbe52674c262b39e1ca77 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..93a5cabefb35f8f40517cd74d3cee811008d4659 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..691b7faef9301ea5d8b205f226ecdbfe7f0618c9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..73f5f0634fa1d4b00a2fb49f1793d72e67c16c87 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..6b3775fd1a72294b76078cdc49baf850ed5056ca --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..078379e88f4811421e58a8d7930e932ca6641e24 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a98bb3237ea032ebfca52ae34e59d88aa3592ffa --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..48afb4f1afd1ca36d3e0c6689d2e7d562099cbeb --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f3192d4aa5b4456deba35f0f4e404a2b11fa7e00 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b6b1224667dae251754c76aeaccbf93a63893e54 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..62f4d92634f0885f13107332cc7e23b824ff8596 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..5545fc09291a7589f59c8725f38a17144f43f826 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c202054206ca67c6885cf4a34bf12a2c9eb163ef --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..64486f4d57b00de9c7b0c403ff49749c7cccee4a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebd15f0c193ea72d878905401027fd28a330a6e9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8dd87d3485fe65777b79be423ffe881796fe879 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..29489663091e3ca5546b6d55f9937dc01ff97a8e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9bf87a23994a2adb512b6fb1d1a2188f70de82a6 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..271b44b03d385f7ad93fdfff959e8b41486451df --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..bf6ced9cc86151f3c804201b38a127a8cb2f5381 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9779a1898513b16aa1c2aaa57f4eb6255b2f9253 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..996e5360fc812f1943959c0bc468974117a97a94 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..472fdf294e8ba71f16708011d3d8529e5b32800c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..c56573da93d2d948eae7d723904a9243498ac484 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4940aa43eec6d5fcb08905bac7892f10b28c7549 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..9505834e0b888345ad6b73283848e9717a58ac98 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f373f5e6dfc0a586f104777611c18ad10dcddb9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..e28db709d1590f546524626df9b13676034d0489 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..753fdd06b9fbb789f5f5215ce9ae73d2f694bf12 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f8bf9b58b304ea4bc7becf5368d3025b192e5ea5 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c56556156aa50810c468bb22a2d00ace2213860 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml new file mode 100644 index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2ac92f31028eece0c85e6cc642876b3fee015063 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..cae9be1bd91a74c7b263399c84dd4e0ff80849b4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..798dc7b939e23386a864ee0ac3f53f2628b7138d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b5ab9e0bdfca3c12d8c932e52a0e0e20bf6e759a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..36061ccd68f650aef565bc8ce8a31be53eebf41a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..54a0ceb172e743011c5a9183cbc31c39d3084019 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e32f2b3092a0f11c35bf4a2ab0ab62172c23815 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9205bfd2568d41ec26689b63fcdb82f30f1e0c7b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,48 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + + + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..2ed0b046f4922fec52231c7c0d5c551cff82f0cc --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..b40c179f05dff58d76a601a232e6fada42f29505 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..32e237518518df60263b914b65d18dba1d0b8f46 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1dcb3b5b547fda7ed525234c4f97976badc60c04 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 30 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..6f775d16de8d0ca478c89310cf56ae9cc12b6d6e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..04d7bda3fde5f365542bbd033cee609457c89604 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_anet.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 30 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebb97f21d137b0826c96dfea0a86e234195d39be --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..47aa1ebe8a83f7375281ca97c736b47b312d7806 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..86ad0779edfff76fd33862932cd3b534902be794 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..11cbc212d5837924bc8afb7d4635427be51ee216 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..0dcf153ed24336b039f3e288035937de41f36a94 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..424cf659ab8edb6fc4b81364ce47a2d568f65c07 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..53a57713fe15fb61233549dc5ecb309e986a5508 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..13ca9fd266ebd75d23197a8e51ad913227640b06 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..f5f9a193a786aad4960447288edf675aecb58129 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml @@ -0,0 +1,21 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_anchor: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8dbabf6d032f5848991215aec06c7fcef0fb711 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml @@ -0,0 +1,21 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_anchor: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7a064f9627e6d2f00667c740f27f3564ce16d63 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml @@ -0,0 +1,32 @@ +id: base +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.1 +pseudo_box_type: align +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef7c8924196f230fbf7abf4bd2aada4d8c813275 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..355c7d66081451b3513304f8b81126fd1976f918 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..5121b6cf76c3b4966c382f4cfca11c71891a721c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a95a0e782daf8c2f799f90d93aa95a6eceb015b2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..59967b4b1bc17087eda8b17e520c3e83e153e699 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..178db2ad770c80e3d3d365842e7df6f2a24c0fee --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..7555f552813b710ea274c5068b4b4b46b15e00e8 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..726c043ff0300b678776e8391d1bb3ed962307cf --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..71be5f3eb1240f38e131b7600e58cf1a1b3f7b3d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..bc03cac62cb816ce7b98e9fcf4ff96167d03b682 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d054ac67dc4a7765def67b554b1c520c04cd18da --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..27e89d7b820b566c825bf2be4e6164f218664588 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..39dfc692b19e2acf130606f314214410f0720990 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..fe6cba647bb5d104b990a3415706d56971426b1f --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..05c7d7053dd25cb4a3e06340268c29a14d7dc05e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..40fd330d45cf07a8e34c442d56c1339fa237c5e1 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd4ead981978c54801c70e6dec9fc4154f6fcf40 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..86b411963f0ab337fa843b01a49b743c55d0f4ba --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4b959243136e53599aa026ae7a386d9ae8ce41a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..259b30520a7d6eb411b14497d97e2b47939933ae --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..59ad8e64a36d46297e604044ab9fa8c0fe8bf3e1 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3f8a497a405c767f138ba08ec811b417d62e5674 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..0aebca8432993dd31f123e618c40a701da8ed968 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..29a1b716404048c53360c2e95eb69510f03eb1e9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,61 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..9c2142a150ea00790b8c18556d0004bfa89afae8 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b624be12c30a6132baf378a696b5d85949754b5 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,49 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 15 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 + +pseudo_box_type: similarity +top_frames: 15 +window_size: 3 +statistic_mode: mode +width_ratio: 1 + + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..3aedf7494857125480dd5c5dc50bd3a5cf9f4260 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5104cde9760d48a7f9fa23cb1a04b825c18f3844 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8bca42c4f97b02f9f9bb5f73f37c7660a2c17f64 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..f75a379f584c0e1c6937f7382106b41aa13de36b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..70bb98540afb82ad228fd38386b705eac0186b43 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..377d0315f95c91a695c3452a623a2178f36f8b5e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a43f4fbb5db27f501c2e18f01a4ce609982ca832 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..edb891d5da98423d456c7b80ab35cc9a50143577 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..b51463e36ec85d9216a6644801548bae016228fe --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..349183f42fc665a103e3b1b628b6f055bfbaee2d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..87138b61dc4b554deebc0245686a152f593825fc --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml @@ -0,0 +1,57 @@ +id: tasty_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/tasty/captiondata/tasty_train.json' +val_caption_file: 'data/tasty/captiondata/tasty_test.json' +gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json'] +gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json'] +dict_file: data/tasty/voc_tasty_14.json +vocab_size: 14670 +max_caption_len: 50 + +train_proposal_type: gt +train_proposal_sample_num: 50 +gt_proposal_sample_num: 50 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: standard +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 # 42 is the max number of events in tasty +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml new file mode 100644 index 0000000000000000000000000000000000000000..1a82a7274fb3f956c8545096bdf86e3e1f9c0468 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml @@ -0,0 +1,57 @@ +id: tasty_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/tasty/captiondata/tasty_train.json' +val_caption_file: 'data/tasty/captiondata/tasty_test.json' +gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json'] +gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json'] +dict_file: data/tasty/vocabulary_tasty.json +vocab_size: 30171 +max_caption_len: 50 + +train_proposal_type: gt +train_proposal_sample_num: 50 +gt_proposal_sample_num: 50 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: standard +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 42 # 42 is the max number of events in tasty +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml b/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml new file mode 100644 index 0000000000000000000000000000000000000000..d14b206d40c4f2399400913d1ff15b8659b575b9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/vlep/captiondata/vlep_meta.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/vlep/vlep_vocabulary_rate2_yc2.json +vocab_size: 4491 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..cccf2ec29dc7c513d8b4cc90d4a6f6a3fabbc28d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,44 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/vlep/base_vlep-yc2_yc2.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/output/vlep_clip_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/vlep/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..00399b0c2f2a021a7476750b003026045d776cd1 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml @@ -0,0 +1,20 @@ +id: yc2_UniVL_pdvc +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] + +feature_dim: 768 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..ab29c8850e08c4549496f768a65f7ff4d08f33ba --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml @@ -0,0 +1,19 @@ +id: yc2_ViP_pdvc_norm +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP-ViP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text/'] +feature_dim: 512 +hidden_dim: 512 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8ecdedb6ef1d7787c74040ef64b644bfb98d956 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml @@ -0,0 +1,29 @@ +id: yc2_abox_ViP_pseudo_similarity +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP-ViP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text'] +feature_dim: 512 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_type: similarity +use_anchor: 0 +pretrained_language_model: CLIP-ViP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c15a2a7c6e8dea1452f91f26dda6fe9fb6ebe8f7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..217693d7afa3593227e2d368fdb2552cd9371369 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..13eef191acd1177c2d4e7bdc64f2b755e80ab5e5 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..88bd8b2fec5674da63b5171fa4c7bfeee0426fc9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..4846c148573235eb6aa047cb024e6c78a4e1cba2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..4b329f14bec0848ba829c1b4048e7fe22fb46e83 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d62fd00faf076f269351f19da5083817b419ff0 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e25901b1bf82142d41e05343f65e1ba59b8d908b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..77143cc57f11432ea5001da37c0014eb1696acc2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..87ff91cad79953a42bb1b582e8170779d1e147c7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..388cbc2b2527190f4ccf17c7006ec9adee33ae5e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8b7d7cecca9a90d08a1fe516e115891645fb0b6 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c49641fdda1e81d5e713e9b615d3d1186a7fea7d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbb37176adcdc33825abe0ed4a943588ad157f4a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b271a74c0f4f3d976410fcc2b607d12056124d08 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d91010f653fe7c8f52caead5e7737a8ea102fd9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..bed129c0b6368d63e430310d7caa0ce4a633e329 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca26e6a13b9df3b80e91cf34b8668676895d6214 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c5086a3c1ab5482fd8595d66ebd5cecd3da4502c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..7ec5cdeb72ada972c4b0ec906ed3d96060a7e018 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..9d56829b138e152c73a8513f85f3f086c40ae838 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..042ae96b01a2c741fe1304441db5bd0ca14109b4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..e9f47c5587ae99e197c9e4c2d87ccc4545923143 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..78f0529a85faecb3ca48833e213e1be64072439a --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..615d745a91273d92d158ecbd28e9eb7e5ec77640 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..f2fe9580c80c0a6c19ebd865cc9625a7f3b997ec --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a3866db1b5556c8b1c7a3f37a37686bdc2170a13 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..707926c6592afdac874f5ba9bafb7c3855e9b18e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..cbc2e0503505aa158c62d8f3687cce87a45ece37 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ff93b47407b09e7af54d101c6dfcab1d359c1d3 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8bdddf2857f8baee722f798460eee584a75e07e --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..35cf1c1bee6180fdd84cf580e3266c24f80bfb2b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c68cd4381b5ddc5447b4da98c968702fcfad52d2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1dcf629977378040564bf4b9256f66fe8a76282 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6768351e467f9cdcfb4cf503621faee1d28da7d5 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..48c7d7ff81d64b9b885be7e8c9c114d5b3a177c6 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..a2880d6336436d6d2da1330e3835323340f5ddd7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..96d7bc4b0db3ea7647ac39f487d43fe96d1b9846 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: align +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6dd2d07dd9d060137eeb3f135a7e2a420d110fdf --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..6043b9b8e94f7f09c976fd10fd340ce0370dbc21 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef3bd5c6fccf6e8fa460e56fbc89d5cda0ad5a16 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ba9e35266c5ecce655d98fbda735fffd5505ab3c --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..373ef4c276a633c7923d9cdf106d81863573f378 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e6e781eb010da1f9c67a8362776119348f972795 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..c2b90ed1817ba9b5af6f71c7aa48499550fed905 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..71cdb3e5f6a305eeefcf987c447ad38ce2c0b9eb --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..14e4add2192671c62c3c5ecab34be01db392ead2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..658a48fbf1a31ce7522f65e4f04453965f1a1130 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8c2448ad587b99db12d4004259da8ad83c400547 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca9efe1df117e81a57d9f4c0b37dccef8ce071a4 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ae2b3a1874058268562185df28b4568088518bd --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f416921a9d59d98d643118774215085f50e4adf --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a43d8c6e42cb71191f4db57266b12c2f8ff1df7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ce8c21fb91989e7b37c1504989be65eaa5c17c1 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml @@ -0,0 +1,42 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v1 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..25c75deb4061ea7fe6c59f5d63d26b85c332f8ce --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..c1a0e79d75681650dce5753ea1934f39e05931ba --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml @@ -0,0 +1,43 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..7e6f5990069b64bd922d068c9721144ee3ed8467 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml @@ -0,0 +1,39 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..a061bab74963587e8c148e67c47ba3e8d8e7bdc9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -0,0 +1,43 @@ +id: basic +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +width_th: 0.5 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..6cf3256fca24a4cdaa0c9e8c89a4fed74edd684b --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..e13936ee7c32a85bb8553de1bddf8dc85f6acdd7 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..31d2af3cfa6d15ec3ce1067648dcb89cf410d309 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..1d5096ca8903c240faa1e45589e77786800bf4d9 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_index +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..c04663ab87edcd38ea5a0fa00c17d99f3203fc02 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca1d85d39c264126d0cdcad3fb4ef6dcd9d78249 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..e431de6395e6a745cf9c9e5f560621c1ef911015 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml new file mode 100644 index 0000000000000000000000000000000000000000..d4b9dec009027bfad79d96f94545c19465146271 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: weight_sim +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml new file mode 100644 index 0000000000000000000000000000000000000000..e66d364a384fa467573af7703d97baf23098c9a2 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml new file mode 100644 index 0000000000000000000000000000000000000000..60d558dd0b51cbe8d184681d7227c91e76246540 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml @@ -0,0 +1,38 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 0 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc66b3cbff2550bf0264a79dd43d6b93ab7256a0 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml @@ -0,0 +1,13 @@ +id: yc2_tsn_pdvc +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 50 + +ec_alpha: 1.0 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..9a1c528c5c792081cbb4873983306c4268a23d55 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml @@ -0,0 +1,9 @@ +id: yc2_tsn_pdvc_gt +base_cfg_path: cfgs_base/yc2_tsn_pdvcl_gt.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml new file mode 100644 index 0000000000000000000000000000000000000000..79ef87700f600af96cb41f1953b4fb1da336c8ec --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml @@ -0,0 +1,16 @@ +id: yc2_tsn_pdvc_prior +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 +num_queries: 50 + +ec_alpha: 1.0 + +transformer_input_type: prior_proposals + +#dec_layers: 3 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml new file mode 100644 index 0000000000000000000000000000000000000000..14941f50b2699cc25e74ee388bfe086ae0bda74d --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml @@ -0,0 +1,18 @@ +id: yc2_tsn_pdvc_prior_add +base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 +num_queries: 50 + +prior_manner: add + +ec_alpha: 1.0 + +transformer_input_type: prior_proposals + +#dec_layers: 3 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml new file mode 100644 index 0000000000000000000000000000000000000000..1420f8abf88d8bbdd6c9cf05454f0949a9fb6c44 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml @@ -0,0 +1,55 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/yc2/captiondata/yc2_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +dict_file: data/yc2/vocabulary_youcook2.json +vocab_size: 1607 + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml new file mode 100644 index 0000000000000000000000000000000000000000..435e85fc3946b15c389de755987b34f8bd75d469 --- /dev/null +++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml @@ -0,0 +1,57 @@ +id: yc2_tsn_pdvcl_gt + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: 'data/yc2/captiondata/yc2_train.json' +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +dict_file: data/yc2/vocabulary_youcook2.json +vocab_size: 1607 + +train_proposal_type: gt +gt_proposal_sample_num: 30 +sample_method: nearest + +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +#with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0.0001 +set_cost_class: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 0 +bbox_loss_coef: 0 +cls_loss_coef: 0 +count_loss_coef: 0 +#max_eseq_length: 10 +#lloss_cross_entropy: 0 +#lloss_focal_loss: 0 +#lloss_gau_mask: 1 + +#two_stage: 1 +transformer_input_type: gt_proposals \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/yc2_univl/backup/change_config_add.py b/yc2_univl/backup/change_config_add.py new file mode 100644 index 0000000000000000000000000000000000000000..4b9ecff04cf568dba78df9a67a4a418abc9edf08 --- /dev/null +++ b/yc2_univl/backup/change_config_add.py @@ -0,0 +1,80 @@ +import os +import yaml +import argparse + +# add dryrun option +parser = argparse.ArgumentParser(description='Change config files') +parser.add_argument('--dryrun', action='store_true', help='dryrun') +args = parser.parse_args() + + + + + +# Define the folder containing YAML files +# folder_path = 'cfgs_ref' +# folder_path = 'cfgs_base/anet' +# folder_path = 'cfgs' +folder_path = 'cfgs_yc2_ft_perc' + +file_filter = '' + + + +# Define the string to find and the replacement string +# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video' +# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj' +# find_string = 'data/yc2/captiondata/yc2' +# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text" +find_string = "ft_gt_percent: 0.25" +# find_string = "pdvc_mode: 0" + +# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual' +# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text' +# replace_string = 'data/tasty/captiondata/tasty' +# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml" +replace_string = "ft_gt_percent: 0.75" +# replace_string = "pdvc_mode: 1" + +old_name = 'perc0.25' +new_name = 'perc0.75' + +def replace_yaml(yaml_file_path, new_file_path, old_string, new_string): + # Read the YAML file as text + with open(yaml_file_path, 'r') as file: + yaml_text = file.read() + + # Replace a string (e.g., 'old_string') with another string (e.g., 'new_string') + + yaml_text = yaml_text.replace(old_string, new_string) + + # Save the modified text back to a YAML file + with open(new_file_path, 'w') as file: + file.write(yaml_text) + + # # Load the modified YAML data (optional) + # modified_yaml_data = yaml.safe_load(yaml_text) + +# You can now work with the modified_yaml_data as needed + +filelist = os.listdir(folder_path) +# Iterate over the files in the folder +for filename in filelist: + if not file_filter in filename: + continue + # breakpoint() + if filename.endswith('.yaml') or filename.endswith('.yml') and old_name in filename: + # breakpoint() + file_path = os.path.join(folder_path, filename) + if old_name == '': + new_filename = filename.replace('.yml', '_{}.yml'.format(new_name)) + else: + new_filename = filename.replace(old_name, new_name) + new_file_path = os.path.join(folder_path, new_filename) + + if args.dryrun: + print("Dryrun: {} -> {}".format(file_path, new_file_path)) + else: + replace_yaml(file_path, new_file_path, find_string, replace_string) + +print("String replacement completed.") \ No newline at end of file diff --git a/yc2_univl/backup/demo.py b/yc2_univl/backup/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..b8e3ab4946905f140f377d120a14deff85f4622f --- /dev/null +++ b/yc2_univl/backup/demo.py @@ -0,0 +1,44 @@ +import numpy as np + +# Example similarity matrix with shape [10, 200] +similarity_matrix = np.random.rand(10, 200) + +# Example range of indices for each step (stored in center and width arrays) +center = np.random.randint(0, 100, size=(10,)) +width = np.random.randint(10, 20, size=(10,)) + +# Calculate the start and end indices for each step +start_indices = np.clip(center - width // 2, 0, similarity_matrix.shape[1]) +end_indices = np.clip(center + width // 2, 0, similarity_matrix.shape[1]) + +# Generate column indices for each range +col_indices = np.arange(similarity_matrix.shape[1]) + +# Get topk values and corresponding indices +topk = 5 +topk_values = [] +topk_indices = [] + +for start, end in zip(start_indices, end_indices): + # Slice the similarity matrix within the specified range + range_values = similarity_matrix[:, start:end] + + # Find the indices of the topk values within the range + sorted_indices = np.argsort(range_values, axis=1)[:, -topk:] + sorted_indices += start # Adjust indices to the absolute position + + # Flatten and concatenate the indices + row_indices = np.arange(len(sorted_indices))[:, np.newaxis] + indices_flat = np.ravel_multi_index((row_indices.flatten(), sorted_indices.flatten()), similarity_matrix.shape) + + # Append topk values and indices + topk_values.append(np.take(similarity_matrix, indices_flat)) + topk_indices.append(np.column_stack((row_indices.repeat(topk, axis=1).flatten(), sorted_indices.flatten()))) + +# Convert lists to arrays +topk_values = np.array(topk_values) +topk_indices = np.array(topk_indices) + +print("Topk values within the specified range:", topk_values) +print("Topk indices within the specified range:", topk_indices) + diff --git a/yc2_univl/backup/eval.py b/yc2_univl/backup/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c59801e0e5a9e72ce22521699e53d796efd49b --- /dev/null +++ b/yc2_univl/backup/eval.py @@ -0,0 +1,146 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import json +import os +import sys +import torch +import numpy as np +import time +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + +from eval_utils import evaluate +from pdvc.pdvc import build +from misc.utils import create_logger +from data.video_dataset import PropSeqDataset, collate_fn +from torch.utils.data import DataLoader +from os.path import basename +import pandas as pd + +def create_fake_test_caption_file(metadata_csv_path): + out = {} + df = pd.read_csv(metadata_csv_path) + for i, row in df.iterrows(): + out[basename(row['filename']).split('.')[0]] = {'duration': row['video-duration'], "timestamps": [[0, 0.5]], "sentences":["None"]} + fake_test_json = '.fake_test_json.tmp' + json.dump(out, open(fake_test_json, 'w')) + return fake_test_json + +def main(opt): + folder_path = os.path.join(opt.eval_save_dir, opt.eval_folder) + if opt.eval_mode == 'test': + if not os.path.exists(folder_path): + os.makedirs(folder_path) + logger = create_logger(folder_path, 'val.log') + if opt.eval_model_path: + model_path = opt.eval_model_path + infos_path = os.path.join('/'.join(opt.eval_model_path.split('/')[:-1]), 'info.json') + else: + model_path = os.path.join(folder_path, 'model-best.pth') + infos_path = os.path.join(folder_path, 'info.json') + + logger.info(vars(opt)) + + with open(infos_path, 'rb') as f: + logger.info('load info from {}'.format(infos_path)) + old_opt = json.load(f)['best']['opt'] + + for k, v in old_opt.items(): + if k[:4] != 'eval': + vars(opt).update({k: v}) + + opt.transformer_input_type = opt.eval_transformer_input_type + + if not torch.cuda.is_available(): + opt.nthreads = 0 + # Create the Data Loader instance + + if opt.eval_mode == 'test': + opt.eval_caption_file = create_fake_test_caption_file(opt.test_video_meta_data_csv_path) + opt.visual_feature_folder = opt.test_video_feature_folder + + val_dataset = PropSeqDataset(opt.eval_caption_file, + opt.visual_feature_folder, opt.text_feature_folder, + opt.dict_file, False, opt.eval_proposal_type, + opt) + loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn) + + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = val_dataset.translator + + + + while not os.path.exists(model_path): + raise AssertionError('File {} does not exist'.format(model_path)) + + logger.debug('Loading model from {}'.format(model_path)) + loaded_pth = torch.load(model_path, map_location=opt.eval_device) + epoch = loaded_pth['epoch'] + + # loaded_pth = transfer(model, loaded_pth, model_path+'.transfer.pth') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + + model.to(opt.eval_device) + + if opt.eval_mode == 'test': + out_json_path = os.path.join(folder_path, 'dvc_results.json') + evaluate(model, criterion, postprocessors, loader, out_json_path, + logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=True) + + + else: + out_json_path = os.path.join(folder_path, '{}_epoch{}_num{}_alpha{}.json'.format( + time.strftime("%Y-%m-%d-%H-%M-%S_", time.localtime()) + str(opt.id), epoch, len(loader.dataset), + opt.ec_alpha)) + caption_scores, eval_loss = evaluate(model, criterion, postprocessors, loader, out_json_path, + logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=False) + # breakpoint() + avg_eval_score = {key: np.array(value).mean() for key, value in caption_scores.items() if key !='tiou'} + # avg_eval_score2 = {key: np.array(value).mean() * 4917 / len(loader.dataset) for key, value in caption_scores.items() if key != 'tiou'} + + # logger.info( + # '\nValidation result based on all 4917 val videos:\n {}\n avg_score:\n{}'.format( + # caption_scores.items(), + # avg_eval_score)) + + logger.info( + '\nValidation result based on {} available val videos:\n avg_score:\n{}'.format(len(loader.dataset), + avg_eval_score)) + + logger.info('saving reults json to {}'.format(out_json_path)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--eval_save_dir', type=str, default='save') + parser.add_argument('--eval_mode', type=str, default='eval', choices=['eval', 'test']) + parser.add_argument('--test_video_feature_folder', type=str, nargs='+', default=None) + parser.add_argument('--test_video_meta_data_csv_path', type=str, default=None) + parser.add_argument('--eval_folder', type=str, required=True) + parser.add_argument('--eval_model_path', type=str, default='') + parser.add_argument('--eval_tool_version', type=str, default='2018', choices=['2018', '2021']) + parser.add_argument('--eval_caption_file', type=str, default='data/anet/captiondata/val_1.json') + parser.add_argument('--eval_proposal_type', type=str, default='gt') + parser.add_argument('--eval_transformer_input_type', type=str, default='queries', choices=['gt_proposals', 'prior_proposals','queries']) + parser.add_argument('--gpu_id', type=str, nargs='+', default=['0']) + parser.add_argument('--eval_device', type=str, default='cuda') + parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all']) + opt = parser.parse_args() + + #breakpoint() + + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + if True: + torch.backends.cudnn.enabled = False + main(opt) diff --git a/yc2_univl/backup/eval_utils.py b/yc2_univl/backup/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f4cd727ecebd0364fe9ad45d94f582fdcb17d54b --- /dev/null +++ b/yc2_univl/backup/eval_utils.py @@ -0,0 +1,241 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import collections +import torch +import numpy as np +import json +from collections import OrderedDict +from tqdm import tqdm +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) + + +from densevid_eval3.eval_soda import eval_soda +from densevid_eval3.eval_para import eval_para +from densevid_eval3.eval_dvc import eval_dvc + +def calculate_avg_proposal_num(json_path): + data = json.load(open(json_path)) + return np.array([len(v) for v in data['results'].values()]).mean() + +def convert_tapjson_to_dvcjson(tap_json, dvc_json): + data = json.load(open(tap_json, 'r')) + data['version'] = "VERSION 1.0" + data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"} + + all_names = list(data['results'].keys()) + for video_name in all_names: + for p_info in data['results'][video_name]: + p_info['timestamp'] = p_info.pop('segment') + p_info['proposal_score'] = p_info.pop('score') + p_info['sentence_score'] = p_info.pop('sentence_score', 0) + data['results']["v_" + video_name] = data['results'].pop(video_name) + json.dump(data, open(dvc_json, 'w')) + + +def convert_dvcjson_to_tapjson(dvc_json, tap_json): + data = json.load(open(dvc_json, 'r'))['results'] + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + + all_names = list(data.keys()) + for video_name in all_names: + video_info = [] + event_num = len(data[video_name]) + timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)] + sentences = [data[video_name][i]['sentence'] for i in range(event_num)] + for i, timestamp in enumerate(timestamps): + score = data[video_name][i].get('proposal_score', 1.0) + video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)}) + out['results'][video_name[2:]] = video_info + json.dump(out, open(tap_json, 'w')) + + +def convert_gtjson_to_tapjson(gt_json, tap_json): + data = json.load(open(gt_json, 'r')) + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + + all_names = list(data.keys()) + for video_name in all_names: + video_info = [] + timestamps = data[video_name]['timestamps'] + sentences = data[video_name]['sentences'] + for i, timestamp in enumerate(timestamps): + video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]}) + out['results'][video_name[2:]] = video_info + with open(tap_json, 'w') as f: + json.dump(out, f) + + +def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8): + data = json.load(open(dvc_json, 'r'))['results'] + out = {} + out['version'] = "VERSION 1.0" + out['external_data'] = {'used:': True, 'details': "GT proposals"} + out['results'] = {} + all_names = list(data.keys()) + num = 0 + bad_vid = 0 + for video_name in all_names: + info = data[video_name] + new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True) + new_info = [p for p in new_info if p[ranking_key] > score_thres] + new_info = new_info[:top_n] + out['results'][video_name] = new_info + num += len(new_info) + if len(new_info) == 0: + bad_vid += 1 + out['results'].pop(video_name) + print('average proosal number: {}'.format(num / len(all_names))) + print('bad videos number: {}'.format(bad_vid)) + print('good videos number: {}'.format(len(out['results']))) + with open(out_json, 'w') as f: + json.dump(out, f) + + +def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'): + score = collections.defaultdict(lambda: -1) + # top_n = 3 + # top_n_filename = dvc_filename + '.top{}.json'.format(top_n) + # get_topn_from_dvcjson(dvc_filename, top_n_filename, top_n=top_n, ranking_key=ranking_key) + # dvc_score = eval_dvc(json_path=top_n_filename, reference=gt_filenames) + # dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()} + # dvc_score.update(eval_soda(top_n_filename, ref_list=gt_filenames)) + # dvc_score.update(eval_para(top_n_filename, referneces=para_gt_filenames)) + # for key in dvc_score.keys(): + # score[key] = dvc_score[key] + if transformer_input_type == 'prior_proposals': + dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames) + score.update(dvc_score) + #breakpoint() + return score + + else: + if rerank: + dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0) + dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version) + dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()} + dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames)) + dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames)) + score.update(dvc_score) + return score + + +def save_dvc_json(out_json, path): + with open(path, 'w') as f: + out_json['valid_video_num'] = len(out_json['results']) + out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item() + json.dump(out_json, f) + +def reranking(p_src, alpha, temperature): + print('alpha: {}, temp: {}'.format(alpha, temperature)) + d = json.load(open(p_src)) + d_items = list(d['results'].items()) + for k,v in d_items: + if True: + sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v] + prop_score = [p['proposal_score'] for p in v] + joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score)) + for i,p in enumerate(v): + p['joint_score'] = joint_score[i] + v = sorted(v, key=lambda x: x['joint_score'], reverse=True) + topN = v[0]['pred_event_count'] + v = v[:topN] + v = sorted(v, key=lambda x: x['timestamp']) + d['results'][k] = v + save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature) + save_dvc_json(d, save_path) + return save_path + + +def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0, + alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False): + out_json = {'results': {}, + 'version': "VERSION 1.0", + 'external_data': {'used:': True, 'details': None}} + opt = loader.dataset.opt + + loss_sum = OrderedDict() + with torch.set_grad_enabled(False): + for dt in tqdm(loader, disable=opt.disable_tqdm): + # valid_keys = ["video_tensor", "video_length", "video_mask", "video_key"] + # dt = {key: value for key, value in dt.items() if key in valid_keys} + dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt = collections.defaultdict(lambda: None, dt) + + dt['video_target'] = [ + {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # output, loss = model(dt, criterion, contrastive_criterion=None, eval_mode=True) + output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True) + orig_target_sizes = dt['video_length'][:, 1] + + weight_dict = criterion.weight_dict + # Huabin comment this line (anything about 'loss') to avoid reporting losses during evaluation + # final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + + # Huabin comment this line to avoid reporting losses during evaluation + # for loss_k, loss_v in loss.items(): + # loss_sum[loss_k] = loss_sum.get(loss_k, 0) + loss_v.item() + # loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + results = postprocessors['bbox'](output, orig_target_sizes, loader) + + batch_json = {} + for idx, video_name in enumerate(dt['video_key']): + segment = results[idx]['boxes'].cpu().numpy() + raw_boxes = results[idx]['raw_boxes'].cpu().numpy() + # pdb.set_trace() + #breakpoint() + batch_json[video_name] = [ + { + "timestamp": segment[pid].tolist(), + "raw_box": raw_boxes[pid].tolist(), + "proposal_score": results[idx]['scores'][pid].item(), + "sentence": results[idx]['captions'][pid], + "sentence_score": results[idx]['caption_scores'][pid], + 'query_id': results[idx]['query_id'][pid].item(), + 'vid_duration': results[idx]['vid_duration'].item(), + 'pred_event_count': results[idx]['pred_seq_len'].item(), + } + for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold] + out_json['results'].update(batch_json) + if debug and len(out_json['results']) > 5: + break + + save_dvc_json(out_json, dvc_json_path) + + if skip_lang_eval: + return None, None + + # Huabin comment this line to avoid reporting losses during evaluation + # for k in loss_sum.keys(): + # loss_sum[k] = np.round(loss_sum[k] / (len(loader) + 1e-5), 3).item() + # logger.info('loss: {}'.format(loss_sum)) + scores = eval_metrics(dvc_json_path, + gt_filenames=opt.gt_file_for_eval, + para_gt_filenames=opt.gt_file_for_para_eval, + alpha=alpha, + rerank=(opt.count_loss_coef > 0), + dvc_eval_version=dvc_eval_version, + transformer_input_type=opt.transformer_input_type + ) + + out_json.update(scores) + save_dvc_json(out_json, dvc_json_path) + # return scores, loss_sum + return scores, [] diff --git a/yc2_univl/backup/misc/MIL_loss.py b/yc2_univl/backup/misc/MIL_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a234e01695ca8871b045a0ba31b13e9e79883a --- /dev/null +++ b/yc2_univl/backup/misc/MIL_loss.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmdet.models.losses import accuracy +from mmdet.models.losses.cross_entropy_loss import _expand_onehot_labels +from .utils import weight_reduce_loss + + +class MILLoss(nn.Module): + + def __init__(self, + # use_binary=True, + # reduction='mean', + binary_ins=False, + loss_weight=1.0, eps=1e-6, loss_type='gfocal_loss'): + """ + Args: + use_binary (bool, optional): Whether to the prediction is + used for binary cross entopy + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + """ + super(MILLoss, self).__init__() + # self.use_binary = use_binary + # self.reduction = reduction + self.loss_weight = loss_weight + # if self.use_sigmoid: + # self.loss_cls = CrossEntropyLoss(use_sigmoid=True, loss_weight=loss_weight) + self.eps = eps + self.loss_type = loss_type + self.binary_ins = binary_ins + + def gfocal_loss(self, p, q, w=1.0): + l1 = (p - q) ** 2 + l2 = q * (p + self.eps).log() + (1 - q) * (1 - p + self.eps).log() + return -(l1 * l2 * w).sum(dim=-1) + + def forward(self, bag_cls_prob, bag_ins_outs, labels, valid, weight=None): + """ + bag_cls_outs: (B, N, C), + bag_ins_outs: (B, N, C*2/C) + valid: (B, N, 1/C) + labels: (B, ) + Returns: + """ + if self.binary_ins: + assert bag_ins_outs.shape[-1] / bag_cls_prob.shape[-1] == 2 + else: + assert bag_ins_outs.shape[-1] == bag_cls_prob.shape[-1] + + B, N, C = bag_cls_prob.shape + prob_cls = bag_cls_prob.unsqueeze(dim=-1) # (B, N, C, 1) + prob_ins = bag_ins_outs.reshape(B, N, C, -1) # (B, N, C, 2/1) + prob_ins = prob_ins.softmax(dim=1) * valid.unsqueeze(dim=-1) + prob_ins = F.normalize(prob_ins, dim=1, p=1) + prob = (prob_cls * prob_ins).sum(dim=1) + acc = accuracy(prob[..., 0], labels) + + label_weights = (valid.sum(dim=1) > 0).float() + labels = _expand_onehot_labels(labels, None, C)[0].float() + num_sample = max(torch.sum(label_weights.sum(dim=-1) > 0).float().item(), 1.) + + if prob.shape[-1] == 1: + prob = prob.squeeze(dim=-1) + elif prob.shape[-1] == 2: # with binary ins + pos_prob, neg_prob = prob[..., 0], prob[..., 1] + prob = torch.cat([pos_prob, neg_prob]) + neg_labels = labels.new_zeros(labels.shape) + labels = torch.cat([labels, neg_labels]) + label_weights = torch.cat([label_weights, label_weights]) + + if self.loss_type == 'gfocal_loss': + loss = self.gfocal_loss(prob, labels, label_weights) + if weight is not None: + # modified by fei ##############################################################3 + weight=weight.squeeze(-1) + elif self.loss_type == 'binary_cross_entropy': + # if self.use_sigmoid: + # method 1: + # loss = self.loss_cls( + # prob, + # labels, + # label_weights, + # avg_factor=avg_factor, + # reduction_override=reduction_override) + # method 2 + prob = prob.clamp(0, 1) + # modified by fei ##############################################################3 + loss = F.binary_cross_entropy(prob, labels.float(), None, reduction="none") + else: + raise ValueError() + loss = weight_reduce_loss(loss, weight, avg_factor=num_sample) * self.loss_weight + return loss, acc, num_sample \ No newline at end of file diff --git a/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc b/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a07b1b2f7c2819d5dcfeb1e5e462ac236b6d940 Binary files /dev/null and b/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc differ diff --git a/yc2_univl/backup/misc/build_vocab.py b/yc2_univl/backup/misc/build_vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..181c9ff27a7edc9d57e16cba107a87857062e24b --- /dev/null +++ b/yc2_univl/backup/misc/build_vocab.py @@ -0,0 +1,66 @@ +# coding:utf-8 +import json + +# file_path_list = ["data/captiondata/train_modified.json", "data/captiondata/val_1.json", "data/captiondata/val_2.json"] +file_path_list = ["data/captiondata/yc2/yc2_train.json", "data/captiondata/yc2/yc2_val.json"] + +count_threshold = 2 # 4 for anet, 2 for youcook2 +# output_path = './data/vocabulary_activitynet.json' +output_path = './data/vocabulary_youcook2.json' + +mark = [',', ':', '!', '_', ';', '-', '.', '?', '/', '"', '\\n', '\\'] + +count_vocal = {} + +for file_path in file_path_list: + data = json.load(open(file_path)) + video_ids = data.keys() + print('video num of ' + file_path.split('/')[-1], len(video_ids)) + for video_id in video_ids: + sentences = data[video_id]["sentences"] + for sentence in sentences: + for m in mark: + if m in sentence: + sentence = sentence.replace(m, " ") + sentence = sentence.replace(" ", " ") + sentence = sentence.replace(" ", " ") + sentence = sentence.replace(" ", " ") + + sentence = sentence.lstrip() + sentence = sentence.rstrip() + sentence = sentence.lower() + sentence = sentence.split(" ") + length = len(sentence) + + # print(sentence) + for word in sentence: + # print(type(word)) + for m in word: + if m == ' ': + print('warning !') + word = word.replace(m, '') + if word == '': + print('warning !') + pass + count_vocal[word] = count_vocal.get(word, 0) + 1 + +print("total word:", sum(count_vocal.values())) +count_vocal[''] = 1e10 +count_vocal[''] = 1e10 +vocab = [word for word, n in count_vocal.items() if n >= count_threshold] +bad_word = [word for word, n in count_vocal.items() if n < count_threshold] +bad_count = sum(count_vocal[word] for word in bad_word) + +vocab.append('UNK') +print("number of vocab:", len(vocab)) +print("number of bad word:", len(bad_word)) +print("number of unks:", bad_count) + +itow = {i + 1: w for i, w in enumerate(vocab)} +wtoi = {w: i + 1 for i, w in enumerate(vocab)} +print(len(itow)) +print(len(wtoi)) + +json.dump({'ix_to_word': itow, + 'word_to_ix': wtoi}, open(output_path, 'w')) +print("saving vocabulary file to {}".format(output_path)) \ No newline at end of file diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6e18c06bca951f4d0ae6bc5e92a08175f68343c Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc differ diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4f6c9c6fb2356fb3b50ae9390f74b21203aa9a4 Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc differ diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8794fcb4c80bab0af2f4c0acf2e324518d3630a Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc differ diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef902352c76a36b2fe11f4a84a4b6186c48b2831 Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc differ diff --git a/yc2_univl/backup/misc/detr_utils/box_ops.py b/yc2_univl/backup/misc/detr_utils/box_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..7d7106ba6c48a3cc3827a4bd923b08c7c61213af --- /dev/null +++ b/yc2_univl/backup/misc/detr_utils/box_ops.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Utilities for bounding box manipulation and GIoU. +""" +import torch +from torchvision.ops.boxes import box_area + +def box_cl_to_xy(x): + c, l = x.unbind(-1) + b = [c - 0.5 * l, c + 0.5 * l] + return torch.stack(b, dim=-1) + +def box_xy_to_cl(x): + x0, x1 = x.unbind(-1) + b = [(x0 + x1) / 2, (x1 - x0)] + return torch.stack(b, dim=-1) + +# modified from torchvision to also return the union +def box_iou(boxes1, boxes2): + area1 = boxes1[:, 1] - boxes1[:, 0] + area2 = boxes2[:, 1] - boxes2[:, 0] + lt = torch.max(boxes1[:, None, 0], boxes2[:, 0]) # [N,M,2] + rb = torch.min(boxes1[:, None, 1], boxes2[:, 1]) # [N,M,2] + inter = (rb - lt).clamp(min=0) # [N,M,2] + union = area1[:, None] + area2 - inter + iou = inter / (union + 1e-5) + return iou, union + + +def generalized_box_iou(boxes1, boxes2): + """ + Generalized IoU from https://giou.stanford.edu/ + + The boxes should be in [x0, y0, x1, y1] format + + Returns a [N, M] pairwise matrix, where N = len(boxes1) + and M = len(boxes2) + """ + # degenerate boxes gives inf / nan results + # so do an early check + assert (boxes1[:, 1:] >= boxes1[:, :1]).all() + assert (boxes2[:, 1:] >= boxes2[:, :1]).all() + iou, union = box_iou(boxes1, boxes2) + lt = torch.min(boxes1[:, None, 0], boxes2[:, 0]) + rb = torch.max(boxes1[:, None, 1], boxes2[:, 1]) + area = (rb - lt).clamp(min=0) # [N,M,2] + giou = iou - (area - union) / (area + 1e-5) + return giou \ No newline at end of file diff --git a/yc2_univl/backup/misc/detr_utils/misc.py b/yc2_univl/backup/misc/detr_utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..168603606353a959ca9cf6a39fbf2d7f9216e560 --- /dev/null +++ b/yc2_univl/backup/misc/detr_utils/misc.py @@ -0,0 +1,989 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import os +import subprocess +import time +from collections import defaultdict, deque +import datetime +import pickle +from typing import Optional, List + +import torch +import torch.distributed as dist +from torch import Tensor + +# needed due to empty tensor bug in pytorch and torchvision 0.5 +import torchvision +# if float(torchvision.__version__[:3]) < 0.7: +# from torchvision.ops import _new_empty_tensor +# from torchvision.ops.misc import _output_size + + +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +import os +import subprocess +import time +from collections import defaultdict, deque +import datetime +import pickle +from typing import Optional, List + +import torch +import torch.nn as nn +import torch.distributed as dist +from torch import Tensor + +# needed due to empty tensor bug in pytorch and torchvision 0.5 +import torchvision +if float(torchvision.__version__[:3]) < 0.5: + import math + # from torchvision.ops.misc import _NewEmptyTensorOp + def _check_size_scale_factor(dim, size, scale_factor): + # type: (int, Optional[List[int]], Optional[float]) -> None + if size is None and scale_factor is None: + raise ValueError("either size or scale_factor should be defined") + if size is not None and scale_factor is not None: + raise ValueError("only one of size or scale_factor should be defined") + if not (scale_factor is not None and len(scale_factor) != dim): + raise ValueError( + "scale_factor shape must match input shape. " + "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) + ) + def _output_size(dim, input, size, scale_factor): + # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int] + assert dim == 2 + _check_size_scale_factor(dim, size, scale_factor) + if size is not None: + return size + # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat + assert scale_factor is not None and isinstance(scale_factor, (int, float)) + scale_factors = [scale_factor, scale_factor] + # math.floor might return float in py2.7 + return [ + int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) + ] +elif float(torchvision.__version__[:3]) < 0.7: + from torchvision.ops import _new_empty_tensor + from torchvision.ops.misc import _output_size + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + + # serialized to a Tensor + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to("cuda") + + # obtain Tensor size of each rank + local_size = torch.tensor([tensor.numel()], device="cuda") + size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + # receiving Tensor from all ranks + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) + if local_size != max_size: + padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") + tensor = torch.cat((tensor, padding), dim=0) + dist.all_gather(tensor_list, tensor) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.cuda.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def get_sha(): + cwd = os.path.dirname(os.path.abspath(__file__)) + + def _run(command): + return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() + sha = 'N/A' + diff = "clean" + branch = 'N/A' + try: + sha = _run(['git', 'rev-parse', 'HEAD']) + subprocess.check_output(['git', 'diff'], cwd=cwd) + diff = _run(['git', 'diff-index', 'HEAD']) + diff = "has uncommited changes" if diff else "clean" + branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) + except Exception: + pass + message = f"sha: {sha}, status: {diff}, branch: {branch}" + return message + + +def collate_fn(batch): + batch = list(zip(*batch)) + batch[0] = nested_tensor_from_tensor_list(batch[0]) + return tuple(batch) + + +def _max_by_axis(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + +def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): + # TODO make this more general + if tensor_list[0].ndim == 3: + # TODO make it support different-sized images + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + dtype = tensor_list[0].dtype + device = tensor_list[0].device + tensor = torch.zeros(batch_shape, dtype=dtype, device=device) + mask = torch.ones((b, h, w), dtype=torch.bool, device=device) + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + m[: img.shape[1], :img.shape[2]] = False + else: + raise ValueError('not supported') + return NestedTensor(tensor, mask) + + +class NestedTensor(object): + def __init__(self, tensors, mask: Optional[Tensor], duration=None): + self.tensors = tensors + self.mask = mask + self.duration = duration + + def to(self, device, non_blocking=False): + # type: (Device) -> NestedTensor # noqa + cast_tensor = self.tensors.to(device, non_blocking=non_blocking) + mask = self.mask + if mask is not None: + assert mask is not None + cast_mask = mask.to(device, non_blocking=non_blocking) + else: + cast_mask = None + return NestedTensor(cast_tensor, cast_mask) + + def record_stream(self, *args, **kwargs): + self.tensors.record_stream(*args, **kwargs) + if self.mask is not None: + self.mask.record_stream(*args, **kwargs) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def get_local_size(): + if not is_dist_avail_and_initialized(): + return 1 + return int(os.environ['LOCAL_SIZE']) + + +def get_local_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return int(os.environ['LOCAL_RANK']) + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + args.dist_url = 'env://' + os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count()) + elif 'SLURM_PROCID' in os.environ: + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + addr = subprocess.getoutput( + 'scontrol show hostname {} | head -n1'.format(node_list)) + os.environ['MASTER_PORT'] = os.environ.get('MASTER_PORT', '29500') + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['RANK'] = str(proc_id) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['LOCAL_SIZE'] = str(num_gpus) + args.dist_url = 'env://' + args.world_size = ntasks + args.rank = proc_id + args.gpu = proc_id % num_gpus + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) + + +@torch.no_grad() +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + if target.numel() == 0: + return [torch.zeros([], device=output.device)] + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +# def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): +# # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor +# """ +# Equivalent to nn.functional.interpolate, but with support for empty batch sizes. +# This will eventually be supported natively by PyTorch, and this +# class can go away. +# """ +# if float(torchvision.__version__[:3]) < 0.7: +# if input.numel() > 0: +# return torch.nn.functional.interpolate( +# input, size, scale_factor, mode, align_corners +# ) +# +# output_shape = _output_size(2, input, size, scale_factor) +# output_shape = list(input.shape[:-2]) + list(output_shape) +# if float(torchvision.__version__[:3]) < 0.5: +# return _NewEmptyTensorOp.apply(input, output_shape) +# return _new_empty_tensor(input, output_shape) +# else: +# return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) + + +def get_total_grad_norm(parameters, norm_type=2): + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + device = parameters[0].grad.device + total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), + norm_type) + return total_norm + + +def inverse_sigmoid(x, eps=1e-5): + x = x.clamp(min=0, max=1) + x1 = x.clamp(min=eps) + x2 = (1 - x).clamp(min=eps) + return torch.log(x1/x2) + + + +# class SmoothedValue(object): +# """Track a series of values and provide access to smoothed values over a +# window or the global series average. +# """ +# +# def __init__(self, window_size=20, fmt=None): +# if fmt is None: +# fmt = "{median:.4f} ({global_avg:.4f})" +# self.deque = deque(maxlen=window_size) +# self.total = 0.0 +# self.count = 0 +# self.fmt = fmt +# +# def update(self, value, n=1): +# self.deque.append(value) +# self.count += n +# self.total += value * n +# +# def synchronize_between_processes(self): +# """ +# Warning: does not synchronize the deque! +# """ +# if not is_dist_avail_and_initialized(): +# return +# t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') +# dist.barrier() +# dist.all_reduce(t) +# t = t.tolist() +# self.count = int(t[0]) +# self.total = t[1] +# +# @property +# def median(self): +# d = torch.tensor(list(self.deque)) +# return d.median().item() +# +# @property +# def avg(self): +# d = torch.tensor(list(self.deque), dtype=torch.float32) +# return d.mean().item() +# +# @property +# def global_avg(self): +# return self.total / self.count +# +# @property +# def max(self): +# return max(self.deque) +# +# @property +# def value(self): +# return self.deque[-1] +# +# def __str__(self): +# return self.fmt.format( +# median=self.median, +# avg=self.avg, +# global_avg=self.global_avg, +# max=self.max, +# value=self.value) +# +# +# def all_gather(data): +# """ +# Run all_gather on arbitrary picklable data (not necessarily tensors) +# Args: +# data: any picklable object +# Returns: +# list[data]: list of data gathered from each rank +# """ +# world_size = get_world_size() +# if world_size == 1: +# return [data] +# +# # serialized to a Tensor +# buffer = pickle.dumps(data) +# storage = torch.ByteStorage.from_buffer(buffer) +# tensor = torch.ByteTensor(storage).to("cuda") +# +# # obtain Tensor size of each rank +# local_size = torch.tensor([tensor.numel()], device="cuda") +# size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] +# dist.all_gather(size_list, local_size) +# size_list = [int(size.item()) for size in size_list] +# max_size = max(size_list) +# +# # receiving Tensor from all ranks +# # we pad the tensor because torch all_gather does not support +# # gathering tensors of different shapes +# tensor_list = [] +# for _ in size_list: +# tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) +# if local_size != max_size: +# padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") +# tensor = torch.cat((tensor, padding), dim=0) +# dist.all_gather(tensor_list, tensor) +# +# data_list = [] +# for size, tensor in zip(size_list, tensor_list): +# buffer = tensor.cpu().numpy().tobytes()[:size] +# data_list.append(pickle.loads(buffer)) +# +# return data_list +# +# +# def reduce_dict(input_dict, average=True): +# """ +# Args: +# input_dict (dict): all the values will be reduced +# average (bool): whether to do average or sum +# Reduce the values in the dictionary from all processes so that all processes +# have the averaged results. Returns a dict with the same fields as +# input_dict, after reduction. +# """ +# world_size = get_world_size() +# if world_size < 2: +# return input_dict +# with torch.no_grad(): +# names = [] +# values = [] +# # sort the keys so that they are consistent across processes +# for k in sorted(input_dict.keys()): +# names.append(k) +# values.append(input_dict[k]) +# values = torch.stack(values, dim=0) +# dist.all_reduce(values) +# if average: +# values /= world_size +# reduced_dict = {k: v for k, v in zip(names, values)} +# return reduced_dict +# +# +# class MetricLogger(object): +# def __init__(self, delimiter="\t"): +# self.meters = defaultdict(SmoothedValue) +# self.delimiter = delimiter +# +# def update(self, **kwargs): +# for k, v in kwargs.items(): +# if isinstance(v, torch.Tensor): +# v = v.item() +# assert isinstance(v, (float, int)) +# self.meters[k].update(v) +# +# def __getattr__(self, attr): +# if attr in self.meters: +# return self.meters[attr] +# if attr in self.__dict__: +# return self.__dict__[attr] +# raise AttributeError("'{}' object has no attribute '{}'".format( +# type(self).__name__, attr)) +# +# def __str__(self): +# loss_str = [] +# for name, meter in self.meters.items(): +# loss_str.append( +# "{}: {}".format(name, str(meter)) +# ) +# return self.delimiter.join(loss_str) +# +# def synchronize_between_processes(self): +# for meter in self.meters.values(): +# meter.synchronize_between_processes() +# +# def add_meter(self, name, meter): +# self.meters[name] = meter +# +# def log_every(self, iterable, print_freq, header=None): +# i = 0 +# if not header: +# header = '' +# start_time = time.time() +# end = time.time() +# iter_time = SmoothedValue(fmt='{avg:.4f}') +# data_time = SmoothedValue(fmt='{avg:.4f}') +# space_fmt = ':' + str(len(str(len(iterable)))) + 'd' +# if torch.cuda.is_available(): +# log_msg = self.delimiter.join([ +# header, +# '[{0' + space_fmt + '}/{1}]', +# 'eta: {eta}', +# '{meters}', +# 'time: {time}', +# 'data: {data}', +# 'max mem: {memory:.0f}' +# ]) +# else: +# log_msg = self.delimiter.join([ +# header, +# '[{0' + space_fmt + '}/{1}]', +# 'eta: {eta}', +# '{meters}', +# 'time: {time}', +# 'data: {data}' +# ]) +# MB = 1024.0 * 1024.0 +# for obj in iterable: +# data_time.update(time.time() - end) +# yield obj +# iter_time.update(time.time() - end) +# if i % print_freq == 0 or i == len(iterable) - 1: +# eta_seconds = iter_time.global_avg * (len(iterable) - i) +# eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) +# if torch.cuda.is_available(): +# print(log_msg.format( +# i, len(iterable), eta=eta_string, +# meters=str(self), +# time=str(iter_time), data=str(data_time), +# memory=torch.cuda.max_memory_allocated() / MB)) +# else: +# print(log_msg.format( +# i, len(iterable), eta=eta_string, +# meters=str(self), +# time=str(iter_time), data=str(data_time))) +# i += 1 +# end = time.time() +# total_time = time.time() - start_time +# total_time_str = str(datetime.timedelta(seconds=int(total_time))) +# print('{} Total time: {} ({:.4f} s / it)'.format( +# header, total_time_str, total_time / len(iterable))) +# +# +# def get_sha(): +# cwd = os.path.dirname(os.path.abspath(__file__)) +# +# def _run(command): +# return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() +# sha = 'N/A' +# diff = "clean" +# branch = 'N/A' +# try: +# sha = _run(['git', 'rev-parse', 'HEAD']) +# subprocess.check_output(['git', 'diff'], cwd=cwd) +# diff = _run(['git', 'diff-index', 'HEAD']) +# diff = "has uncommited changes" if diff else "clean" +# branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) +# except Exception: +# pass +# message = f"sha: {sha}, status: {diff}, branch: {branch}" +# return message +# +# # +# # def collate_fn(batch): +# # batch = list(zip(*batch)) +# # batch[0] = nested_tensor_from_tensor_list(batch[0]) +# # return tuple(batch) +# +# +# def _max_by_axis(the_list): +# # type: (List[List[int]]) -> List[int] +# maxes = the_list[0] +# for sublist in the_list[1:]: +# for index, item in enumerate(sublist): +# maxes[index] = max(maxes[index], item) +# return maxes +# +# +# class NestedTensor(object): +# def __init__(self, tensors, mask: Optional[Tensor]): +# self.tensors = tensors +# self.mask = mask +# +# def to(self, device): +# # type: (Device) -> NestedTensor # noqa +# cast_tensor = self.tensors.to(device) +# mask = self.mask +# if mask is not None: +# assert mask is not None +# cast_mask = mask.to(device) +# else: +# cast_mask = None +# return NestedTensor(cast_tensor, cast_mask) +# +# def decompose(self): +# return self.tensors, self.mask +# +# def __repr__(self): +# return str(self.tensors) +# +# # +# # def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): +# # # TODO make this more general +# # if tensor_list[0].ndim == 3: +# # if torchvision._is_tracing(): +# # # nested_tensor_from_tensor_list() does not export well to ONNX +# # # call _onnx_nested_tensor_from_tensor_list() instead +# # return _onnx_nested_tensor_from_tensor_list(tensor_list) +# # +# # # TODO make it support different-sized images +# # max_size = _max_by_axis([list(img.shape) for img in tensor_list]) +# # # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) +# # batch_shape = [len(tensor_list)] + max_size +# # b, c, h, w = batch_shape +# # dtype = tensor_list[0].dtype +# # device = tensor_list[0].device +# # tensor = torch.zeros(batch_shape, dtype=dtype, device=device) +# # mask = torch.ones((b, h, w), dtype=torch.bool, device=device) +# # for img, pad_img, m in zip(tensor_list, tensor, mask): +# # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) +# # m[: img.shape[1], :img.shape[2]] = False +# # else: +# # raise ValueError('not supported') +# # return NestedTensor(tensor, mask) +# +# +# # _onnx_nested_tensor_from_tensor_list() is an implementation of +# # nested_tensor_from_tensor_list() that is supported by ONNX tracing. +# # @torch.jit.unused +# # def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: +# # max_size = [] +# # for i in range(tensor_list[0].dim()): +# # max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64) +# # max_size.append(max_size_i) +# # max_size = tuple(max_size) +# # +# # # work around for +# # # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) +# # # m[: img.shape[1], :img.shape[2]] = False +# # # which is not yet supported in onnx +# # padded_imgs = [] +# # padded_masks = [] +# # for img in tensor_list: +# # padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] +# # padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) +# # padded_imgs.append(padded_img) +# # +# # m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) +# # padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) +# # padded_masks.append(padded_mask.to(torch.bool)) +# # +# # tensor = torch.stack(padded_imgs) +# # mask = torch.stack(padded_masks) +# # +# # return NestedTensor(tensor, mask=mask) +# +# +# def setup_for_distributed(is_master): +# """ +# This function disables printing when not in master process +# """ +# import builtins as __builtin__ +# builtin_print = __builtin__.print +# +# def print(*args, **kwargs): +# force = kwargs.pop('force', False) +# if is_master or force: +# builtin_print(*args, **kwargs) +# +# __builtin__.print = print +# +# +# def is_dist_avail_and_initialized(): +# if not dist.is_available(): +# return False +# if not dist.is_initialized(): +# return False +# return True +# +# +# def get_world_size(): +# if not is_dist_avail_and_initialized(): +# return 1 +# return dist.get_world_size() +# +# +# def get_rank(): +# if not is_dist_avail_and_initialized(): +# return 0 +# return dist.get_rank() +# +# +# def is_main_process(): +# return get_rank() == 0 +# +# +# def save_on_master(*args, **kwargs): +# if is_main_process(): +# torch.save(*args, **kwargs) +# +# +# def init_distributed_mode(args): +# if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: +# args.rank = int(os.environ["RANK"]) +# args.world_size = int(os.environ['WORLD_SIZE']) +# args.gpu = int(os.environ['LOCAL_RANK']) +# elif 'SLURM_PROCID' in os.environ: +# args.rank = int(os.environ['SLURM_PROCID']) +# args.gpu = args.rank % torch.cuda.device_count() +# else: +# print('Not using distributed mode') +# args.distributed = False +# return +# +# args.distributed = True +# +# torch.cuda.set_device(args.gpu) +# args.dist_backend = 'nccl' +# print('| distributed init (rank {}): {}'.format( +# args.rank, args.dist_url), flush=True) +# torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, +# world_size=args.world_size, rank=args.rank) +# torch.distributed.barrier() +# setup_for_distributed(args.rank == 0) +# +# +# @torch.no_grad() +# def accuracy(output, target, topk=(1,)): +# """Computes the precision@k for the specified values of k""" +# if target.numel() == 0: +# return [torch.zeros([], device=output.device)] +# maxk = max(topk) +# batch_size = target.size(0) +# +# _, pred = output.topk(maxk, 1, True, True) +# pred = pred.t() +# correct = pred.eq(target.view(1, -1).expand_as(pred)) +# +# res = [] +# for k in topk: +# correct_k = correct[:k].view(-1).float().sum(0) +# res.append(correct_k.mul_(100.0 / batch_size)) +# return res +# +# +# # def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): +# # # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor +# # """ +# # Equivalent to nn.functional.interpolate, but with support for empty batch sizes. +# # This will eventually be supported natively by PyTorch, and this +# # class can go away. +# # """ +# # if float(torchvision.__version__[:3]) < 0.7: +# # if input.numel() > 0: +# # return torch.nn.functional.interpolate( +# # input, size, scale_factor, mode, align_corners +# # ) +# # +# # output_shape = _output_size(2, input, size, scale_factor) +# # output_shape = list(input.shape[:-2]) + list(output_shape) +# # return _new_empty_tensor(input, output_shape) +# # else: +# # return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) diff --git a/yc2_univl/backup/misc/utils.py b/yc2_univl/backup/misc/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..301a498189d0568ce14362b3630f2c89c2a26c6e --- /dev/null +++ b/yc2_univl/backup/misc/utils.py @@ -0,0 +1,357 @@ +# coding:utf-8 +# from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +import torch +import numpy as np +import glob +import shutil +import os +import colorlog +import random +import six +from six.moves import cPickle +import matplotlib as mpl + +mpl.use('Agg') +import matplotlib.pyplot as plt + + +def match_name_keywords(n, name_keywords): + out = False + for b in name_keywords: + if b in n: + out = True + break + return out + + +def decide_two_stage(transformer_input_type, dt, criterion): + if transformer_input_type == 'gt_proposals': + two_stage = True + proposals = dt['gt_boxes'] + proposals_mask = dt['gt_boxes_mask'] + criterion.matcher.cost_caption = 0 + for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']: + for key in criterion.weight_dict.keys(): + if q_k in key: + criterion.weight_dict[key] = 0 + disable_iterative_refine = True + elif transformer_input_type == 'prior_proposals': + two_stage = True + proposals = dt['gt_boxes'] + proposals_mask = None + criterion.matcher.cost_caption = 0 + for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']: + for key in criterion.weight_dict.keys(): + if q_k in key: + criterion.weight_dict[key] = 0 + disable_iterative_refine = False + elif transformer_input_type == 'queries': # + two_stage = False + proposals = None + proposals_mask = None + disable_iterative_refine = False + else: + raise ValueError('Wrong value of transformer_input_type, got {}'.format(transformer_input_type)) + return two_stage, disable_iterative_refine, proposals, proposals_mask + + +def pickle_load(f): + """ Load a pickle. + Parameters + ---------- + f: file-like object + """ + if six.PY3: + return cPickle.load(f, encoding='latin-1') + else: + return cPickle.load(f) + + +def pickle_dump(obj, f): + """ Dump a pickle. + Parameters + ---------- + obj: pickled object + f: file-like object + """ + if six.PY3: + return cPickle.dump(obj, f, protocol=2) + else: + return cPickle.dump(obj, f) + + +def set_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + # grid_sampler_2d_backward_cuda does not have a deterministic implementation. try set torch.use_deterministic_algorithms(True, warn_only=True) to see the non-deterministic operation + # torch.use_deterministic_algorithms(True, warn_only=True) + + +def update_values(dict_from, dict_to): + for key, value in dict_from.items(): + if key not in dict_to.keys(): + raise AssertionError('key mismatching: {}'.format(key)) + if isinstance(value, dict): + update_values(dict_from[key], dict_to[key]) + elif value is not None: + dict_to[key] = dict_from[key] + + +def print_opt(opt, model, logger): + print_alert_message('All args:', logger) + for key, item in opt._get_kwargs(): + logger.info('{} = {}'.format(key, item)) + print_alert_message('Model structure:', logger) + logger.info(model) + + +def build_folder_name(opt): + # The dataset + # breakpoint() + if len(opt.visual_feature_folder) == 2: + if ('youcook2' in opt.visual_feature_folder[1]) or ('yc2' in opt.visual_feature_folder[1]): + dataset_name = 'howto-yc2_yc2' + elif ('Tasty' in opt.visual_feature_folder[1]) or ('tasty' in opt.visual_feature_folder[1]): + dataset_name = 'howto-tasty_tasty' + elif ('anet' in opt.visual_feature_folder[1]) or ('Anet' in opt.visual_feature_folder[1]): + dataset_name = 'howto-anet_anet' + # elif ('vlep' in opt.visual_feature_folder[1]) or ('Vlep' in opt.visual_feature_folder[1]): + # dataset_name = 'howto-vlep_vlep' + else: + raise ValueError('Wrong dataset name') + + if 'vlep' in opt.visual_feature_folder[0] or 'Vlep' in opt.visual_feature_folder[0]: + dataset_name = dataset_name.replace('howto', 'vlep') + else: + if ('youcook2' in opt.visual_feature_folder[0]) or ('yc2' in opt.visual_feature_folder[0]): + dataset_name = 'yc2' + elif ('Anet' in opt.visual_feature_folder[0]) or ('anet' in opt.visual_feature_folder[0]): + dataset_name = 'anet' + elif ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]): + dataset_name = 'tasty' + elif ('Howto' in opt.visual_feature_folder[0]) or ('howto' in opt.visual_feature_folder[0]): + if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]): + dataset_name = 'howto_yc2' + elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]: + dataset_name = 'howto_tasty' + elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]: + dataset_name = 'howto_anet' + elif ('vlep' in opt.visual_feature_folder[0]) or ('Vlep' in opt.visual_feature_folder[0]): + if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]): + dataset_name = 'vlep_yc2' + elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]: + dataset_name = 'vlep_tasty' + elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]: + dataset_name = 'vlep_anet' + else: + raise ValueError('Wrong dataset name') + if 'tasty_14' in opt.dict_file: + dataset_name += '_voc14' + + # The code base + if opt.use_anchor: + use_anchor = 'anc' # Means learnable anchor is used + else: + use_anchor = 'ori' # Means original anchor in pdvc is used + + # The state of using pseudo boxes + if opt.use_pseudo_box: + use_pseudo = 'pbox' + if opt.pseudo_box_type == 'similarity': + use_pseudo += '(sim)' + else: + use_pseudo += '({})'.format(opt.pseudo_box_type) + else: + use_pseudo = 'GT' + + # The viusal-text model used + if opt.pretrained_language_model == 'CLIP-ViP': + text_model = 'ViP' + elif opt.pretrained_language_model == 'UniVL': + text_model = 'Uni' + else: + text_model = opt.pretrained_language_model + + format_folder_name = '_'.join([dataset_name, use_anchor, use_pseudo, text_model]) + + + + return format_folder_name + +def build_folder(opt): + # breakpoint() + if opt.start_from: + print('Start training from id:{}'.format(opt.start_from)) + save_folder = os.path.join(opt.save_dir, opt.start_from) + assert os.path.exists(save_folder) and os.path.isdir(save_folder), 'Wrong start_from path: {}'.format(save_folder) + else: + if not os.path.exists(opt.save_dir): + os.mkdir(opt.save_dir) + format_folder_name = build_folder_name(opt) + # breakpoint() + save_foldername = '' + if opt.use_pseudo_box: + if opt.pseudo_box_type != 'align': + if opt.pseudo_box_type == 'similarity_op' or opt.pseudo_box_type == 'similarity_op_order': + save_foldername = '{}_topf{}_beta{}_iter{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.beta, opt.iteration, opt.width_ratio) + elif opt.pseudo_box_type == 'similarity_op_order_v2': + save_foldername = '{}_topf{}_iter{}_r{}_th{}'.format(opt.pseudo_box_type, opt.top_frames, opt.iteration, opt.width_ratio, opt.width_th) + else: + save_foldername = '{}_topf{}_w{}_{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.window_size, opt.statistic_mode, opt.width_ratio) + else: + save_folder = 'align' + else: + save_foldername = 'gtbox' + + if opt.refine_pseudo_box: + save_foldername += '_refine_aug({},{})_top{}_{}stage'.format(opt.pseudo_box_aug_num, \ + opt.pseudo_box_aug_ratio, \ + opt.merge_k_boxes, \ + opt.refine_pseudo_stage_num) + if opt.pseudo_box_aug_mode == 'uniform': + save_foldername += '_uniform' + elif opt.pseudo_box_aug_mode == 'random_new': + save_foldername += '_random_new' + save_foldername += ('_' + opt.merge_criterion) + if opt.merge_mode == 'interpolate': + save_foldername += '_interpolate' + if opt.use_neg_pseudo_box: + save_foldername += '_{}neg'.format(opt.num_neg_box) + if opt.mil_loss_coef != 1.0: + save_foldername += '_mil_coef{}'.format(str(opt.mil_loss_coef)) + if opt.weighted_mil_loss: + save_foldername += '_wMIL' + if not opt.focal_mil: + save_foldername += '_noFocal' + if opt.disable_rematch: + save_foldername += '_nomatch' + if opt.use_additional_score_layer: + save_foldername += '_S-layer' + if opt.use_additional_cap_layer: + save_foldername += '_C-layer' + if 'puyu' in opt.train_caption_file[0]: + save_foldername += '_puyu' + elif 'mixlm' in opt.train_caption_file[0]: + save_foldername += '_mixlm' + + if opt.id != '': + save_foldername += '_{}'.format(opt.id) + # breakpoint() + # basefilename = os.path.basename(opt.cfg_path) + # basefilename = os.path.splitext(basefilename)[0] + save_folder = os.path.join(opt.save_dir, format_folder_name) + save_folder = os.path.join(save_folder, save_foldername) + if os.path.exists(save_folder): + print('Results folder "{}" already exists, renaming it...'.format(save_folder)) + i = 1 + while 1: + new_save_folder = save_folder + '_{}'.format(i) + if not os.path.exists(new_save_folder): + save_folder = new_save_folder + break + i += 1 + # wait_flag = input('Warning! Path {} already exists, rename it? (Y/N) : '.format(save_folder)) + # if wait_flag in ['Y', 'y']: + # # opt.id = opt.id + '_{}'.format(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) + # # save_folder = os.path.join(opt.save_dir, opt.id) + # # print('Rename opt.id as "{}".'.format(opt.id)) + # new_name = input('the new name to be appended :') + # save_folder = save_folder + '_' + new_name + # # elif wait_flag in ['N', 'n']: + # # wait_flag_new = input('Are you sure re-write this folder:{}? (Y/N): '.format(save_folder)) + # # if wait_flag_new in ['Y', 'y']: + # # return save_folder + # # else: + # # raise AssertionError('Folder {} already exists'.format(save_folder)) + # else: + # raise AssertionError('Folder {} already exists'.format(save_folder)) + print('Results folder "{}" does not exist, creating folder...'.format(save_folder)) + os.makedirs(save_folder) + os.makedirs(os.path.join(save_folder, 'prediction')) + return save_folder + + +def backup_envir(save_folder, opt): + cfg_path = opt.cfg_path + dir_path = os.path.dirname(cfg_path) + backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc'] + if dir_path not in backup_folders: + backup_folders.append(dir_path) + + backup_files = glob.glob('./*.py') + for folder in backup_folders: + shutil.copytree(folder, os.path.join(save_folder, 'backup', folder)) + for file in backup_files: + shutil.copyfile(file, os.path.join(save_folder, 'backup', file)) + + +def create_logger(folder, filename): + log_colors = { + 'DEBUG': 'blue', + 'INFO': 'white', + 'WARNING': 'green', + 'ERROR': 'red', + 'CRITICAL': 'yellow', + } + + import logging + logger = logging.getLogger('DVC') + # %(filename)s$RESET:%(lineno)d + # LOGFORMAT = "%(log_color)s%(asctime)s [%(log_color)s%(filename)s:%(lineno)d] | %(log_color)s%(message)s%(reset)s |" + LOGFORMAT = "" + LOG_LEVEL = logging.DEBUG + logging.root.setLevel(LOG_LEVEL) + stream = logging.StreamHandler() + stream.setLevel(LOG_LEVEL) + stream.setFormatter(colorlog.ColoredFormatter(LOGFORMAT, datefmt='%d %H:%M', log_colors=log_colors)) + + # print to log file + hdlr = logging.FileHandler(os.path.join(folder, filename)) + hdlr.setLevel(LOG_LEVEL) + # hdlr.setFormatter(logging.Formatter("[%(asctime)s] %(message)s")) + hdlr.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(hdlr) + logger.addHandler(stream) + return logger + + +def print_alert_message(str, logger=None): + msg = '*' * 20 + ' ' + str + ' ' + '*' * (58 - len(str)) + if logger: + logger.info('\n\n' + msg) + else: + print(msg) + + +def set_lr(optimizer, lr): + for group in optimizer.param_groups: + group['lr'] = lr + + +def clip_gradient(optimizer, grad_clip): + for group in optimizer.param_groups: + for i, param in enumerate(group['params']): + if param.grad is not None: + param.grad.data.clamp_(-grad_clip, grad_clip) + + +if __name__ == '__main__': + # import opts + # + # info = {'opt': vars(opts.parse_opts()), + # 'loss': {'tap_loss': 0, 'tap_reg_loss': 0, 'tap_conf_loss': 0, 'lm_loss': 0}} + # record_this_run_to_csv(info, 'save/results_all_runs.csv') + + logger = create_logger('./', 'mylogger.log') + logger.info('debug') + logger.info('test2') diff --git a/yc2_univl/backup/opts.py b/yc2_univl/backup/opts.py new file mode 100644 index 0000000000000000000000000000000000000000..8c0abaea05f6aefca9779237b1d3c555f10e45ec --- /dev/null +++ b/yc2_univl/backup/opts.py @@ -0,0 +1,312 @@ +import argparse +import time +import yaml +import os +import numpy as np + +def parse_opts(): + parser = argparse.ArgumentParser() + + # configure of this run + parser.add_argument('--cfg_path', type=str, required=True, help='config file') + parser.add_argument('--id', type=str, default='', help='id of this run. Results and logs will saved in this folder ./save/id') + parser.add_argument('--gpu_id', type=str, nargs='+', default=[]) + parser.add_argument('--disable_tqdm', action='store_true') + parser.add_argument('--seed', type=int, default=777) + parser.add_argument('--random_seed', action='store_true', help='choose a random seed from {1,...,1000}') + parser.add_argument('--disable_cudnn', type=int, default=0, help='disable cudnn may solve some unknown bugs') + parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging') + parser.add_argument('--device', default='cuda', choices=['cpu', 'cuda'], help='device to use for training / testing') + parser.add_argument('--map', action='store_true', default=False, help='map a100 data path to 3090 data path') + # parser.add_argument('--extra_id', type=str, default='', help='extra config for listed in the folder name') + + # ***************************** INPUT DATA PATH ***************************** + parser.add_argument('--train_caption_file', type=str, + default='data/anet/captiondata/train_modified.json', help='') + parser.add_argument('--invalid_video_json', type=str, nargs='+', default=[]) + parser.add_argument('--val_caption_file', type=str, default='data/anet/captiondata/val_1.json') + parser.add_argument('--visual_feature_folder', type=str, default='data/anet/resnet_bn') + parser.add_argument('--text_feature_folder', type=str, default=None) + parser.add_argument('--gt_file_for_auc', type=str, nargs='+', default='data/anet/captiondata/val_all.json') + parser.add_argument('--gt_file_for_eval', type=str, nargs='+', default=['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']) + parser.add_argument('--gt_file_for_para_eval', type=str, nargs='+', default= ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']) + parser.add_argument('--dict_file', type=str, default='data/anet/vocabulary_activitynet.json', help='') + parser.add_argument('--criteria_for_best_ckpt', type=str, default='overall', choices=['dvc', 'pc', 'overall'], help='for dense video captioning, use soda_c + METEOR as the criteria' + 'for paragraph captioning, choose the best para_METEOR+para_CIDEr+para_BLEU4' + 'for overall, select BLEU4 + METEOR + F1_score') + + parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet', 'UniVL', 'CLIP', 'CLIP-ViP']) + parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector') + + parser.add_argument('--start_from', type=str, default='', help='id of the run with incompleted training') + parser.add_argument('--start_from_mode', type=str, choices=['best', 'last'], default="last") + parser.add_argument('--pretrain', type=str, choices=['full', 'encoder', 'decoder']) + parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth') + + # ***************************** DATALOADER OPTION ***************************** + parser.add_argument('--nthreads', type=int, default=4) + parser.add_argument('--data_norm', type=int, default=0) + parser.add_argument('--data_rescale', type=int, default=1) + + parser.add_argument('--feature_sample_rate', type=int, default=1) + parser.add_argument('--train_proposal_sample_num', type=int, + default=24, + help='number of sampled proposals (or proposal sequence), a bigger value may be better') + parser.add_argument('--gt_proposal_sample_num', type=int, default=30) + parser.add_argument('--ft_gt_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.') + parser.add_argument('--pre_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.') + + + # ***************************** Caption Decoder ***************************** + parser.add_argument('--vocab_size', type=int, default=5747) + parser.add_argument('--wordRNN_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'], + help='C:clip-level features, E: event-level features, C+E: both') + parser.add_argument('--caption_decoder_type', type=str, default="light", + choices=['none','light', 'standard']) + parser.add_argument('--rnn_size', type=int, default=512, + help='size of the rnn in number of hidden nodes in each layer') + parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the RNN') + parser.add_argument('--input_encoding_size', type=int, default=512, + help='the encoding size of each token in the vocabulary') + parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP') + parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN') + parser.add_argument('--max_caption_len', type=int, default=30, help='') + + # ***************************** Transformer ***************************** + parser.add_argument('--hidden_dim', type=int, default=512) + parser.add_argument('--num_queries', type=int, default=100) + parser.add_argument('--hidden_dropout_prob', type=float, default=0.5) + parser.add_argument('--layer_norm_eps', type=float, default=1e-12) + parser.add_argument('--caption_cost_type', type=str, default='loss') + parser.add_argument('--set_cost_caption', type=float, default=0) + parser.add_argument('--set_cost_class', type=float, default=1) + parser.add_argument('--set_cost_bbox', type=float, default=5) + parser.add_argument('--set_cost_giou', type=float, default=2) + parser.add_argument('--cost_alpha', type=float, default=0.25) + parser.add_argument('--cost_gamma', type=float, default=2) + + parser.add_argument('--bbox_loss_coef', default=5, type=float) + parser.add_argument('--giou_loss_coef', default=2, type=float) + parser.add_argument('--count_loss_coef', default=0, type=float) + parser.add_argument('--caption_loss_coef', default=0, type=float) + parser.add_argument('--eos_coef', default=0.1, type=float, + help="Relative classification weight of the no-object class") + parser.add_argument('--num_classes', type=int, default=1) + parser.add_argument('--dec_layers', type=int, default=6) + parser.add_argument('--enc_layers', type=int, default=6) + parser.add_argument('--transformer_ff_dim', type=int, default=2048) + parser.add_argument('--transformer_dropout_prob', type=float, default=0.1) + parser.add_argument('--frame_embedding_num', type=int, default = 100) + parser.add_argument('--sample_method', type=str, default = 'nearest', choices=['nearest', 'linear']) + parser.add_argument('--fix_xcw', type=int, default=0) + + # ***************************** Learnable anchor ***************************** + parser.add_argument('--use_anchor', default=False, action='store_true') + parser.add_argument('--random_anchor_init', default=True, action='store_false') + parser.add_argument('--prior_anchor_duration_init', default=True, action='store_false') + + # ***************************** Text-query alignment ***************************** + parser.add_argument('--matcher_type', type=str, default='default', choices=['default', 'DTW', 'Sim']) + # === For Text encoder === + parser.add_argument('--pretrained_language_model', type=str, default='UniVL', \ + choices=['UniVL', 'CLIP', 'CLIP-ViP'], help='Pretrained hugging face model') + parser.add_argument('--text_hidden_dim', type=int, default=768, help='hidden dim of text encoder') + parser.add_argument('--max_text_input_len', type=int, default=32, help='') + parser.add_argument('--max_pos_num', type=int, default=500) + parser.add_argument('--huggingface_cache_dir', type=str, default='.cache') + parser.add_argument('--text_encoder_learning_strategy', type=str, default='frozen',choices=('frozen')) + + # === For generate_pesudo_bbox === + parser.add_argument('--use_pseudo_box', default=False, action='store_true') + parser.add_argument('--pseudo_box_type', type=str, default='similarity', choices=['align', 'similarity', 'weight_sim', 'weight_index', 'modeframe']) + + # 1) For different ways of generating pseudo box + parser.add_argument('--top_frames', type=int, default=15) + parser.add_argument('--window_size', type=int, default=2) + parser.add_argument('--statistic_mode', type=str, default='median', choices=['mode', 'median']) + parser.add_argument('--width_ratio', type=float, default=-1) + parser.add_argument('--beta', type=float, default=1, help="weight for overlap loss") + parser.add_argument('--width_th', type=float, default=0.5, help="threshold for width") + parser.add_argument('--iteration', type=int, default=3, help="iteration for pseudo box generation") + # 2) For box refinement + parser.add_argument('--pseudo_box_aug', default=False, action='store_true') + parser.add_argument('--pseudo_box_aug_num', type=int, default=5) + parser.add_argument('--pseudo_box_aug_ratio', type=float, default=0.1) + parser.add_argument('--pseudo_box_aug_mode', default='random', choices=['random', 'uniform']) + parser.add_argument('--refine_pseudo_box', default=False, action='store_true') + parser.add_argument('--use_additional_score_layer', default=False, action='store_true') + parser.add_argument('--use_additional_cap_layer', default=False, action='store_true') + parser.add_argument('--merge_k_boxes', type=int, default=3) + parser.add_argument('--merge_criterion', type=str, choices=['cap_topk', 'ins_topk', 'ins_cap_topk'], default='cap_topk') + parser.add_argument('--merge_mode', type=str, choices=['weighted_sum, interpolate'], default='weighted_sum') + parser.add_argument('--refine_pseudo_stage_num', type=int, default=2) + parser.add_argument('--use_query_box_for_refine', default=False, action='store_true') + parser.add_argument('--norm_ins_score', default='sigmoid', choices=['sigmoid', 'softmax']) + parser.add_argument('--cap_prob_clip', default=False, action='store_true') + parser.add_argument('--use_neg_pseudo_box', default=False, action='store_true') + parser.add_argument('--num_neg_box', default=10, type=int) + parser.add_argument('--weighted_mil_loss', default=False, action='store_true') + parser.add_argument('--focal_mil', default=False, action='store_true') + parser.add_argument('--disable_rematch', default=False, action='store_true') + parser.add_argument('--start_refine_epoch', default=-1, type=int) + + + # === For DTW === + parser.add_argument('--align_keep_percentile', type=float, default=0.1) + parser.add_argument('--align_top_band_size', type=int, default=0) + parser.add_argument('--align_drop_z', type=int, default=0) + parser.add_argument('--align_one_to_many', default=False, action='store_true') + parser.add_argument('--align_many_to_one', default=False, action='store_true') + parser.add_argument('--align_contiguous', default=False, action='store_true') + + # === For Sim matcher + parser.add_argument('--set_cost_sim', type=float, default=1.0) + + # === For contrastive === + parser.add_argument('--enable_contrastive', default=False, action='store_true', help='enable contrastive learning') + parser.add_argument('--disable_contrastive_projection', default=False, action='store_true', help='disable contrastive projection layers') + parser.add_argument('--contrastive_hidden_size', type=int, default=128, help='Contrastive hidden size') + parser.add_argument('--contrastive_loss_start_coef', type=float, default=0.1, help='Weight of contrastive loss') + parser.add_argument('--contrastive_loss_temperature', type=float, default=0.1, help='Temperature of cl temperature') + parser.add_argument('--enable_cross_video_cl', type=bool, default=True, help='Enable cross video contrastive loss') + parser.add_argument('--enable_e2t_cl', default=True, action='store_true', help=' enable event-to-text contrastive') + parser.add_argument('--enable_bg_for_cl', default=True, action='store_true', help=' add a class for background events') + parser.add_argument('--set_cost_cl', type=float, default=0.0) + parser.add_argument('--cl_schedule_val', type=float, nargs='+', default=[0, 0.1]) + parser.add_argument('--cl_schedule_time', type=int, nargs='+', default=[0, 2]) + + + + # ***************************** Prior ***************************** + parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all']) + + # ***************************** OPTIMIZER ***************************** + parser.add_argument('--training_scheme', type=str, default='all', choices=['cap_head_only', 'no_cap_head', 'all']) + parser.add_argument('--epoch', type=int, default=25) + parser.add_argument('--batch_size', type=int, default=1, help='batch_size') + parser.add_argument('--batch_size_for_eval', type=int, default=1, help='') + parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value') + parser.add_argument('--optimizer_type', type=str, default='adam') + parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay') + + parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature') + parser.add_argument('--learning_rate_decay_start', type=float, default=8) + parser.add_argument('--learning_rate_decay_every', type=float, default=3) + parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5) + + # ***************************** SAVING AND LOGGING ***************************** + parser.add_argument('--min_epoch_when_save', type=int, default=-1) + parser.add_argument('--save_checkpoint_every', type=int, default=1) + parser.add_argument('--save_all_checkpoint', action='store_true') + parser.add_argument('--save_dir', type=str, default='/mnt/data/pjlab-3090-sport/wuhao/logs/dibs', help='directory to store checkpointed models') + + # ***************************** For Deformable DETR ************************************* + parser.add_argument('--lr_backbone_names', default=["None"], type=str, nargs='+') + parser.add_argument('--lr_backbone', default=2e-5, type=float) + parser.add_argument('--lr_proj', default=0, type=int) + parser.add_argument('--lr_linear_proj_names', default=['reference_points', 'sampling_offsets'], type=str, nargs='+') + parser.add_argument('--lr_linear_proj_mult', default=0.1, type=float) + + # Variants of Deformable DETR + parser.add_argument('--with_box_refine', default=False, action='store_true') + parser.add_argument('--transformer_input_type', default='queries', choices=['gt_proposals', 'prior_proposals', 'learnt_proposals', 'queries']) + + # * Backbone + parser.add_argument('--backbone', default=None, type=str, + help="Name of the convolutional backbone to use") + parser.add_argument('--dilation', action='store_true', + help="If true, we replace stride with dilation in the last convolutional block (DC5)") + parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'), + help="Type of positional embedding to use on top of the image features") + parser.add_argument('--position_embedding_scale', default=2 * np.pi, type=float, + help="position / size * scale") + parser.add_argument('--num_feature_levels', default=4, type=int, help='number of feature levels') + + # * Transformer + + parser.add_argument('--nheads', default=8, type=int, + help="Number of attention heads inside the transformer's attentions") + parser.add_argument('--dec_n_points', default=4, type=int) + parser.add_argument('--enc_n_points', default=4, type=int) + + parser.add_argument('--share_caption_head', type = int ,default=1) + + parser.add_argument('--cap_nheads', default=8, type=int) + parser.add_argument('--cap_dec_n_points', default=4, type=int) + parser.add_argument('--cap_num_feature_levels', default=4, type=int) + parser.add_argument('--disable_mid_caption_heads', action='store_true') + + # Loss + parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false', + help="Disables auxiliary decoding losses (loss at each layer)") + + + # * Loss coefficients + + parser.add_argument('--cls_loss_coef', default=2, type=float) + parser.add_argument('--self_iou_loss_coef', default=0.0, type=float) + parser.add_argument('--ref_rank_loss_coef', default=0.1, type=float) + parser.add_argument('--mil_loss_coef', default=1.0, type=float) + parser.add_argument('--focal_alpha', default=0.25, type=float) + parser.add_argument('--focal_gamma', default=2., type=float) + + + #***************************** Event counter ***************************** + parser.add_argument('--max_eseq_length', default=10, type=int) + parser.add_argument('--lloss_gau_mask', default=1, type=int) + parser.add_argument('--lloss_beta', default=1, type=float) + + # scheduled sampling + parser.add_argument('--scheduled_sampling_start', type=int, default=-1, + help='at what iteration to start decay gt probability') + parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob') + parser.add_argument('--scheduled_sampling_increase_every', type=int, default=2, + help='every how many iterations thereafter to gt probability') + parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05, + help='How much to update the prob') + parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25, + help='Maximum scheduled sampling prob.') + + # reranking + parser.add_argument('--ec_alpha', type=float, default=0.3) + parser.add_argument('--test', action='store_true', default=False) + args = parser.parse_args() + + if args.cfg_path: + import_cfg(args.cfg_path, vars(args)) + + if args.random_seed: + import random + seed = int(random.random() * 1000) + new_id = args.id + '_seed{}'.format(seed) + save_folder = os.path.join(args.save_dir, new_id) + while os.path.exists(save_folder): + seed = int(random.random() * 1000) + new_id = args.id + '_seed{}'.format(seed) + save_folder = os.path.join(args.save_dir, new_id) + args.id = new_id + args.seed = seed + + if args.debug: + args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) + args.save_checkpoint_every = 1 + args.shuffle = 0 + + if args.caption_decoder_type == 'none': + assert args.caption_loss_coef == 0 + assert args.set_cost_caption == 0 + + print("args.id: {}".format(args.id)) + return args + +def import_cfg(cfg_path, args): + with open(cfg_path, 'r') as handle: + yml = yaml.load(handle, Loader=yaml.FullLoader) + if 'base_cfg_path' in yml: + base_cfg_path = yml['base_cfg_path'] + import_cfg(base_cfg_path, args) + args.update(yml) + pass +if __name__ == '__main__': + opt = parse_opts() + print(opt) \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py b/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py new file mode 100644 index 0000000000000000000000000000000000000000..4b44fae2e15520e0c09c298d233e686c9b45d36e --- /dev/null +++ b/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py @@ -0,0 +1,174 @@ +# This file contains ShowAttendTell and AllImg model + +# ShowAttendTell is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention +# https://arxiv.org/abs/1502.03044 + +# AllImg is a model where +# img feature is concatenated with word embedding at every time step as the input of lstm +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pdb + +import numpy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import * + +class Captioner(nn.Module): + def __init__(self, opt): + super(Captioner, self).__init__() + self.opt = opt + + self.vocab_size = opt.vocab_size + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.max_caption_len = opt.max_caption_len + + self.ss_prob = 0.0 # Schedule sampling probability + self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) + + self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) + self.dropout = nn.Dropout(self.drop_prob_lm) + + self.init_weights() + + def init_weights(self): + initrange = 0.1 + self.embed.weight.data.uniform_(-initrange, initrange) + self.logit.bias.data.fill_(0) + self.logit.weight.data.uniform_(-initrange, initrange) + + def init_hidden(self, batch_size): + weight = next(self.parameters()).data + return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(), + weight.new(self.num_layers, batch_size, self.rnn_size).zero_()) # (h0, c0) + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + max_len = input.shape[1] + output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output + + def forward(self, event, clip, clip_mask, seq): + batch_size = clip.shape[0] + + state = self.init_hidden(batch_size) + outputs = [] + seq = seq.long() + + for i in range(seq.size(1) - 1): + if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample + sample_prob = clip.data.new(batch_size).uniform_(0, 1) + sample_mask = sample_prob < self.ss_prob + if sample_mask.sum() == 0: + it = seq[:, i].clone() + else: + sample_ind = sample_mask.nonzero().view(-1) + it = seq[:, i].data.clone() + prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) + it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) + it = Variable(it, requires_grad=False) + else: + it = seq[:, i].clone() + # break if all the sequences end + if i >= 1 and seq[:, i].data.sum() == 0: + break + + output, state = self.get_logprobs_state(it, event, clip, clip_mask, state) + outputs.append(output) + + return torch.cat([_.unsqueeze(1) for _ in outputs], 1) + + + def get_logprobs_state(self, it, event , clip, clip_mask, state): + xt = self.embed(it) + output, state = self.core(xt, event , clip, clip_mask, state) + logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1) + return logprobs, state + + def sample(self, event , clip, clip_mask, opt={}): + + sample_max = opt.get('sample_max', 1) + beam_size = opt.get('beam_size', 1) + temperature = opt.get('temperature', 1.0) + + batch_size = clip.shape[0] + + state = self.init_hidden(batch_size) + + seq = [] + seqLogprobs = [] + + for t in range(self.max_caption_len + 1): + if t == 0: # input + it = clip.data.new(batch_size).long().zero_() + elif sample_max: + sampleLogprobs, it = torch.max(logprobs.data, 1) + it = it.view(-1).long() + else: + if temperature == 1.0: + prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1) + else: + # scale logprobs by temperature + prob_prev = torch.exp(torch.div(logprobs.data, temperature)) + it = torch.multinomial(prob_prev, 1) + sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions + it = it.view(-1).long() # and flatten indices for downstream processing + + logprobs, state = self.get_logprobs_state(it, event , clip, clip_mask, state) + + if t >= 1: + # stop when all finished + if t == 1: + unfinished = it > 0 + else: + unfinished = unfinished & (it > 0) + if unfinished.sum() == 0: + break + it = it * unfinished.type_as(it) + seq.append(it) #seq[t] the input of t+2 time step + seqLogprobs.append(sampleLogprobs.view(-1)) + + if seq==[] or len(seq)==0: + return [],[] + return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1) + +class AllImgCore(nn.Module): + def __init__(self, opt): + super(AllImgCore, self).__init__() + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.att_feat_size = opt.clip_context_dim + + self.opt = opt + self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type + self.input_dim = self.decide_input_feats_dim() + self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim, + self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) + assert self.wordRNN_input_feats_type == 'C' + + def decide_input_feats_dim(self): + dim = 0 + if 'E' in self.wordRNN_input_feats_type: + dim += self.opt.event_context_dim + if 'C' in self.wordRNN_input_feats_type: + dim += self.opt.clip_context_dim + return dim + + def forward(self, xt, event, clip, clip_mask, state): + input_feats = (clip * clip_mask.unsqueeze(2)).sum(1) / (clip_mask.sum(1, keepdims=True) + 1e-5) + output, state = self.rnn(torch.cat([xt, input_feats], 1).unsqueeze(0), state) + return output.squeeze(0), state + + +class LightCaptioner(Captioner): + def __init__(self, opt): + super(LightCaptioner, self).__init__(opt) + self.core = AllImgCore(opt) diff --git a/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py b/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py new file mode 100644 index 0000000000000000000000000000000000000000..918fb0ccf89416929b4cee8c1deadd7c99d586ae --- /dev/null +++ b/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py @@ -0,0 +1,289 @@ +# This file contains ShowAttendTell and AllImg model + +# ShowAttendTell(Soft attention) is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention +# https://arxiv.org/abs/1502.03044 + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import * + +from pdvc.ops.modules import MSDeformAttnCap + +class Captioner(nn.Module): + def __init__(self, opt): + super(Captioner, self).__init__() + self.opt = opt + + self.vocab_size = opt.vocab_size + self.input_encoding_size = opt.input_encoding_size + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + self.max_caption_len = opt.max_caption_len + + self.ss_prob = 0.0 # Schedule sampling probability + self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) + + self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1) + self.dropout = nn.Dropout(self.drop_prob_lm) + + self.init_weights() + + def init_weights(self): + initrange = 0.1 + self.embed.weight.data.uniform_(-initrange, initrange) + self.logit.bias.data.fill_(0) + self.logit.weight.data.uniform_(-initrange, initrange) + + def init_hidden(self, batch_size): + weight = next(self.parameters()).data + return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(), + weight.new(self.num_layers, batch_size, self.rnn_size).zero_()) # (h0, c0) + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + max_len = input.shape[1] + output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output + + def build_prob(self, input, target, mask): + ''' + Calculate the sentence-level predicted prob for each GT sentence of each query + input: [num_sentence, max_length, num_words_voc] + ''' + # breakpoint() + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) # [num_sentence, max_length, num_words_voc] + max_len = input.shape[1] + # output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6) + output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6) + return output + + def forward(self,hs, reference, others, cap_tensor): + seq = cap_tensor + vid_num, query_num, _ = hs.shape + assert vid_num == 1 + + reference_points = reference + input_flatten = others['memory'] + input_spatial_shapes = others['spatial_shapes'] + input_level_start_index = others['level_start_index'] + input_padding_mask = others['mask_flatten'] + if reference_points.shape[-1] == 2: + reference_points = reference_points[:, :, None] \ + * torch.stack([others['valid_ratios']]*2, -1)[:, None] + elif reference_points.shape[-1] == 1: + reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None, :, None] + + query = hs + batch_size = query.shape[1] + state = self.init_hidden(batch_size) + outputs = [] + raw_probs = [] + seq = seq.long() + + n_levels = self.core.n_levels + if n_levels < self.core.opt.num_feature_levels: + input_spatial_shapes = input_spatial_shapes[:n_levels] + input_level_start_index = input_level_start_index[:n_levels] + total_input_len = torch.prod(input_spatial_shapes, dim=1).sum() + input_flatten = input_flatten[:, :total_input_len] + input_padding_mask = input_padding_mask[:, :total_input_len] + reference_points = reference_points[:, :, :n_levels] + pass + + for i in range(seq.size(1) - 1): + if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample + sample_prob = hs.new_zeros(batch_size).uniform_(0, 1) + sample_mask = sample_prob < self.ss_prob + if sample_mask.sum() == 0: + it = seq[:, i].clone() + else: + sample_ind = sample_mask.nonzero().view(-1) + it = seq[:, i].data.clone() + prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) + it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) + it = Variable(it, requires_grad=False) + else: + it = seq[:, i].clone() + # break if all the sequences end + if i >= 1 and seq[:, i].data.sum() == 0: + break + + output, state, raw_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask) + outputs.append(output) + raw_probs.append(raw_prob) + + if self.opt.refine_pseudo_box and self.training: + return torch.cat([_.unsqueeze(1) for _ in outputs], 1), torch.cat([_.unsqueeze(1) for _ in raw_probs], 1) + + return torch.cat([_.unsqueeze(1) for _ in outputs], 1) + + + def get_logprobs_state(self, it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask): + xt = self.embed(it) + output, state = self.core(xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask) + logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1) + softmax_probs = F.softmax(self.logit(self.dropout(output)), dim=1) + raw_probs = self.logit(self.dropout(output)) + # raw_probs: [max_num_word, vocab_size+1] + return logprobs, state, raw_probs + + def sample(self,hs, reference, others, opt={}): + + vid_num, query_num, _ = hs.shape + assert vid_num == 1 + batch_size = vid_num * query_num + sample_max = opt.get('sample_max', 1) + beam_size = opt.get('beam_size', 1) + temperature = opt.get('temperature', 1.0) + + reference_points = reference + input_flatten = others['memory'] + input_spatial_shapes = others['spatial_shapes'] + input_level_start_index = others['level_start_index'] + input_padding_mask = others['mask_flatten'] + if reference_points.shape[-1] == 2: + reference_points = reference_points[:, :, None] \ + * torch.stack([others['valid_ratios']]*2, -1)[:, None] + elif reference_points.shape[-1] == 1: + reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None,:, None] + query = hs + + n_levels = self.core.n_levels + if n_levels < self.core.opt.num_feature_levels: + input_spatial_shapes = input_spatial_shapes[:n_levels] + input_level_start_index = input_level_start_index[:n_levels] + total_input_len = torch.prod(input_spatial_shapes, dim=1).sum() + input_flatten = input_flatten[:, :total_input_len] + input_padding_mask = input_padding_mask[:, :total_input_len] + reference_points = reference_points[:, :, :n_levels] + pass + + state = self.init_hidden(batch_size) + + seq = [] + seqLogprobs = [] + #breakpoint() + + for t in range(self.max_caption_len + 1): + if t == 0: # input + it = hs.data.new(batch_size).long().zero_() + elif sample_max: + sampleLogprobs, it = torch.max(logprobs.data, 1) + it = it.view(-1).long() + else: + if temperature == 1.0: + prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1) + else: + # scale logprobs by temperature + prob_prev = torch.exp(torch.div(logprobs.data, temperature)) + it = torch.multinomial(prob_prev, 1) + sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions + it = it.view(-1).long() # and flatten indices for downstream processing + + logprobs, state, softmax_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask) + + if t >= 1: + # stop when all finished + if t == 1: + unfinished = it > 0 + else: + unfinished = unfinished & (it > 0) + if unfinished.sum() == 0: + break + it = it * unfinished.type_as(it) + seq.append(it) #seq[t] the input of t+2 time step + seqLogprobs.append(sampleLogprobs.view(-1)) + + if seq==[] or len(seq)==0: + return [],[] + return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1) + + +class ShowAttendTellCore(nn.Module): + + def __init__(self, opt): + super(ShowAttendTellCore, self).__init__() + self.input_encoding_size = opt.input_encoding_size + + self.rnn_size = opt.rnn_size + self.num_layers = opt.num_layers + self.drop_prob_lm = opt.drop_prob + #self.fc_feat_size = opt.fc_feat_size + self.att_feat_size = int(opt.clip_context_dim / opt.cap_nheads) + self.att_hid_size = opt.att_hid_size + + self.opt = opt + self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type + self.input_dim = opt.hidden_dim * 2 + + self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim , + self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) + self.att_drop = nn.Dropout(0.5) + + d_model = opt.hidden_dim + self.n_levels = opt.cap_num_feature_levels + self.n_heads = opt.cap_nheads + self.n_points = opt.cap_dec_n_points + + self.deformable_att = MSDeformAttnCap(d_model, self.n_levels, self.n_heads, self.n_points) + + if self.att_hid_size > 0: + self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size) + self.h2att = nn.Linear(self.rnn_size, self.att_hid_size) + self.alpha_net = nn.Linear(self.att_hid_size, 1) + + def get_input_feats(self, event, att_clip): + input_feats = [] + if 'E' in self.wordRNN_input_feats_type: + input_feats.append(event) + if 'C' in self.wordRNN_input_feats_type: + input_feats.append(att_clip) + input_feats = torch.cat(input_feats,1) + return input_feats + + def forward(self,xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask): + + joint_query = torch.cat((state[0][-1].unsqueeze(0), query), 2) + # (N_, N_q, C) + + N_, Lq_, L_, _ = reference_points.shape + + # (N_ * M_, D_, Lq_, L_* P_) + clip = self.deformable_att(joint_query, reference_points, input_flatten, input_spatial_shapes, + input_level_start_index, input_padding_mask) + clip = clip.reshape(N_, self.n_heads, -1, Lq_, self.n_levels * self.n_points).permute(0, 3, 1, 4, 2) + clip = clip.reshape(N_ * Lq_, self.n_heads, self.n_levels * self.n_points, self.att_feat_size) + att_size = self.n_levels * self.n_points + + att = self.ctx2att(clip) # (batch * att_size) * att_hid_size + att = att.view(-1, self.n_heads, att_size, self.att_hid_size) # batch * att_size * att_hid_size + att_h = self.h2att(state[0][-1]) # batch * att_hid_size + att_h = att_h.unsqueeze(1).unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size + dot = att + att_h # batch * att_size * att_hid_size + dot = torch.tanh(dot) # batch * att_size * att_hid_size + dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size + dot = self.alpha_net(dot) # (batch * att_size) * 1 + dot = dot.view(-1, att_size) # batch * att_size + + weight = F.softmax(dot, dim=1) + att_feats_ = clip.reshape(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size + att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size + att_res = att_res.reshape(N_ * Lq_, self.n_heads, self.att_feat_size).flatten(1) + input_feats = torch.cat((att_res.unsqueeze(0), query), 2) + # print(xt.shape, input_feats.shape, query.shape, reference_points.shape) + output, state = self.rnn(torch.cat([xt.unsqueeze(0), input_feats], 2), state) + + return output.squeeze(0), state + + +class LSTMDSACaptioner(Captioner): + def __init__(self, opt): + super(LSTMDSACaptioner, self).__init__(opt) + self.core = ShowAttendTellCore(opt) + diff --git a/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py b/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py new file mode 100644 index 0000000000000000000000000000000000000000..3051b3d3de863fefc196e08740e7d6d05474adfd --- /dev/null +++ b/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py @@ -0,0 +1,26 @@ +import torch +import torch.nn as nn + + +class PuppetCaptionModel(nn.Module): + def __init__(self, opt): + super(PuppetCaptionModel, self).__init__() + self.vocab_size = opt.vocab_size + self.opt = opt + self.puppet_layer= nn.Linear(1,1) + + def forward(self, event, clip, clip_mask, seq): + N, L = seq.shape + output = torch.zeros((N, L-1, self.vocab_size + 1), device=seq.device) + return output + + def sample(self, event, clip, clip_mask, opt={}): + N, _, C = clip.shape + output = torch.zeros((N, 3), device=clip.device) + prob = torch.zeros((N, 3), device=clip.device) + return output, prob + + def build_loss(self, input, target, mask): + one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) + output = - (one_hot * input * mask[..., None]).sum(2).sum(1) / (mask.sum(1) + 1e-6) + return output \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__init__.py b/yc2_univl/backup/pdvc/CaptioningHead/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55abd1cc8681971b0e498d5db23771053240029f --- /dev/null +++ b/yc2_univl/backup/pdvc/CaptioningHead/__init__.py @@ -0,0 +1,22 @@ +from .LSTM import LightCaptioner +from .Puppet import PuppetCaptionModel +from .LSTM_DSA import LSTMDSACaptioner + +def build_captioner(opt): + if opt.caption_decoder_type == 'none': + caption_embed = PuppetCaptionModel(opt) + + elif opt.caption_decoder_type == 'light': + opt.event_context_dim = None + opt.clip_context_dim = opt.hidden_dim + caption_embed = LightCaptioner(opt) + + elif opt.caption_decoder_type == 'standard': + opt.event_context_dim = None + opt.clip_context_dim = opt.hidden_dim + caption_embed = LSTMDSACaptioner(opt) + + else: + raise ValueError('caption decoder type is invalid') + return caption_embed + diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96e1403d966894f3897772ec3341693c9e1e2097 Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7505e3befe8da0cfc2e2cf4ad989639a7aad658 Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac28b0fdbaca42bce04d24e8200908e43ca3849d Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82aceccc9d18b389c1de136320f99a9d3948bc21 Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..204ecd5a71e01bd0a22222a738ac51abf7b3af9a Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86f06e3b6d2a72ca205a646c86a1e9309be235c6 Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7506f43c89c0c6345ffd3c53b53cd87d5c394cbc Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e0d650e0f33bbf2aa9248e89a8ac9ec8a76397b Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/UniVL.py b/yc2_univl/backup/pdvc/UniVL.py new file mode 100644 index 0000000000000000000000000000000000000000..c5a8bcf7f019968d8751bbbab0537295c77ebfdd --- /dev/null +++ b/yc2_univl/backup/pdvc/UniVL.py @@ -0,0 +1,238 @@ + +import os +import random +import numpy as np +from pathlib import Path +from pdvc.modules.modeling import UniVL +from pdvc.modules.tokenization import BertTokenizer +from transformers import AutoTokenizer, BertForPreTraining +import torch +import argparse + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + +class UniVL_args(object): + def __init__(self) -> None: + self.do_pretrain = False + self.do_train = False + self.do_eval = True + self.train_csv = 'data/youcookii_singlef_train.csv' + self.val_csv = 'data/youcookii_singlef_val.csv' + self.data_path = 'data/youcookii_caption.pickle' + self.features_path = 'data/youcookii_videos_feature.pickle' + self.num_thread_reader = 1 + self.lr = 0.0001 + self.epochs = 20 + self.batch_size = 256 + self.batch_size_val = 3500 + self.lr_decay = 0.9 + self.n_display = 100 + self.video_dim = 1024 + self.seed = 42 + self.max_words = 48 + self.max_frames = 100 + self.feature_framerate = 1 + self.margin = 0.1 + self.hard_negative_rate = 0.5 + self.negative_weighting = 1 + self.n_pair = 1 + self.output_dir = None + self.bert_model = "bert-base-uncased" + self.visual_model = "visual-base" + self.cross_model = "cross-base" + self.decoder_model = "decoder-base" + self.init_model = None + self.do_lower_case = True + self.warmup_proportion = 0.1 + self.gradient_accumulation_steps = 1 + self.n_gpu = 1 + self.cache_dir = "" + self.fp16 = False + self.fp16_opt_level = 'O1' + self.task_type = "retrieval" + self.datatype = "youcook" + self.world_size = 0 + self.local_rank = 0 + self.coef_lr = 0.1 + self.use_mil = False + self.sampled_use_mil = False + self.text_num_hidden_layers = 12 + self.visual_num_hidden_layers = 6 + self.cross_num_hidden_layers = 2 + self.decoder_num_hidden_layers = 3 + self.train_sim_after_cross = False + self.expand_msrvtt_sentences = False + self.batch_size = int(self.batch_size / self.gradient_accumulation_steps) + + def __repr__(self) -> str: + return str(self.__dict__) + + + + +# def get_args(description='UniVL on Retrieval Task'): +# parser = argparse.ArgumentParser(description=description) +# parser.add_argument("--do_pretrain", action='store_true', help="Whether to run training.") +# parser.add_argument("--do_train", action='store_true', help="Whether to run training.") +# parser.add_argument("--do_eval", action='store_true', default=True, help="Whether to run eval on the dev set.") + +# parser.add_argument('--train_csv', type=str, default='data/youcookii_singlef_train.csv', help='') +# parser.add_argument('--val_csv', type=str, default='data/youcookii_singlef_val.csv', help='') +# parser.add_argument('--data_path', type=str, default='data/youcookii_caption.pickle', help='data pickle file path') +# parser.add_argument('--features_path', type=str, default='data/youcookii_videos_feature.pickle', help='feature path') + +# parser.add_argument('--num_thread_reader', type=int, default=1, help='') +# parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate') +# parser.add_argument('--epochs', type=int, default=20, help='upper epoch limit') +# parser.add_argument('--batch_size', type=int, default=256, help='batch size') +# parser.add_argument('--batch_size_val', type=int, default=3500, help='batch size eval') +# parser.add_argument('--lr_decay', type=float, default=0.9, help='Learning rate exp epoch decay') +# parser.add_argument('--n_display', type=int, default=100, help='Information display frequence') +# parser.add_argument('--video_dim', type=int, default=1024, help='video feature dimension') +# parser.add_argument('--seed', type=int, default=42, help='random seed') +# parser.add_argument('--max_words', type=int, default=20, help='') +# parser.add_argument('--max_frames', type=int, default=100, help='') +# parser.add_argument('--feature_framerate', type=int, default=1, help='') +# parser.add_argument('--margin', type=float, default=0.1, help='margin for loss') +# parser.add_argument('--hard_negative_rate', type=float, default=0.5, help='rate of intra negative sample') +# parser.add_argument('--negative_weighting', type=int, default=1, help='Weight the loss for intra negative') +# parser.add_argument('--n_pair', type=int, default=1, help='Num of pair to output from data loader') + +# parser.add_argument("--output_dir", default=None, type=str, +# help="The output directory where the model predictions and checkpoints will be written.") +# parser.add_argument("--bert_model", default="bert-base-uncased", type=str, +# help="Bert pre-trained model") +# parser.add_argument("--visual_model", default="visual-base", type=str, required=False, help="Visual module") +# parser.add_argument("--cross_model", default="cross-base", type=str, required=False, help="Cross module") +# parser.add_argument("--decoder_model", default="decoder-base", type=str, required=False, help="Decoder module") +# parser.add_argument("--init_model", default=None, type=str, required=False, help="Initial model.") +# parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") +# parser.add_argument("--warmup_proportion", default=0.1, type=float, +# help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% of training.") +# parser.add_argument('--gradient_accumulation_steps', type=int, default=1, +# help="Number of updates steps to accumulate before performing a backward/update pass.") +# parser.add_argument('--n_gpu', type=int, default=1, help="Changed in the execute process.") + +# parser.add_argument("--cache_dir", default="", type=str, +# help="Where do you want to store the pre-trained models downloaded from s3") + +# parser.add_argument('--fp16', action='store_true', +# help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") +# parser.add_argument('--fp16_opt_level', type=str, default='O1', +# help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." +# "See details at https://nvidia.github.io/apex/amp.html") + +# parser.add_argument("--task_type", default="retrieval", type=str, help="Point the task `retrieval` to finetune.") +# parser.add_argument("--datatype", default="youcook", type=str, help="Point the dataset `youcook` to finetune.") + +# parser.add_argument("--world_size", default=0, type=int, help="distribted training") +# parser.add_argument("--local_rank", default=0, type=int, help="distribted training") +# parser.add_argument('--coef_lr', type=float, default=0.1, help='coefficient for bert branch.') +# parser.add_argument('--use_mil', action='store_true', help="Whether use MIL as Miech et. al. (2020).") +# parser.add_argument('--sampled_use_mil', action='store_true', help="Whether MIL, has a high priority than use_mil.") + +# parser.add_argument('--text_num_hidden_layers', type=int, default=12, help="Layer NO. of text.") +# parser.add_argument('--visual_num_hidden_layers', type=int, default=6, help="Layer NO. of visual.") +# parser.add_argument('--cross_num_hidden_layers', type=int, default=2, help="Layer NO. of cross.") +# parser.add_argument('--decoder_num_hidden_layers', type=int, default=3, help="Layer NO. of decoder.") + +# parser.add_argument('--train_sim_after_cross', action='store_true', help="Test retrieval after cross encoder.") +# parser.add_argument('--expand_msrvtt_sentences', action='store_true', help="") + +# args = parser.parse_args() + +# # Check paramenters +# if args.gradient_accumulation_steps < 1: +# raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( +# args.gradient_accumulation_steps)) +# if not args.do_train and not args.do_eval: +# raise ValueError("At least one of `do_train` or `do_eval` must be True.") + +# args.batch_size = int(args.batch_size / args.gradient_accumulation_steps) + +# return args + +def set_seed_logger(args): + # predefining random initial seeds + random.seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) # if you are using multi-GPU. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + # world_size = torch.distributed.get_world_size() + # torch.cuda.set_device(args.local_rank) + # args.world_size = world_size + + # if not os.path.exists(args.output_dir): + # os.makedirs(args.output_dir, exist_ok=True) + + return args + +def load_pretrained_UniVL(return_visual_encoder=False): + + args = UniVL_args() + args = set_seed_logger(args) + device, n_gpu = 'cuda', 1 + + init_model = '/cpfs01/user/liuhuabin/PDVC/pdvc/modules/univl.pretrained.bin' + model_state_dict = torch.load(init_model, map_location='cpu') + + # Prepare model + cache_dir = os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed') + model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base', + cache_dir=cache_dir, state_dict=model_state_dict, task_config=args) + + model.to(device) + if return_visual_encoder: + return model.bert, model.visual, model.normalize_video + else: + return model.bert + +def build_UniVL_tokenizer(): + return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + +# if __name__ == '__main__': +# device, n_gpu = 'cuda', 1 +# captions = ['I love you', 'you believe me'] + +# tokenizer_hg = AutoTokenizer.from_pretrained("bert-base-uncased") +# text_encoder_hg = tokenizer_hg(captions, return_tensors='pt', truncation=True, padding=True, max_length=20) +# text_encoder_hg = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_hg.items()} +# attention_mask = text_encoder_hg['attention_mask'] + +# args = UniVL_args() +# args = set_seed_logger(args) +# args.init_model = 'modules/univl.pretrained.bin' +# # tokenizer = build_UniVL_tokenizer() +# # input_ids = [] +# # for sent in captions: +# # sent = tokenizer.tokenize(sent) +# # sent = ['[CLS]'] + sent + ['[SEP]'] +# # input_ids += tokenizer.convert_tokens_to_ids(sent) +# model = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model) +# text_embed = model(**text_encoder_hg, output_all_encoded_layers=True)[0][-1] +# breakpoint() + +if __name__ == '__main__': + device, n_gpu = 'cuda', 1 + args = UniVL_args() + args = set_seed_logger(args) + args.init_model = 'modules/univl.pretrained.bin' + # tokenizer = build_UniVL_tokenizer() + # input_ids = [] + # for sent in captions: + # sent = tokenizer.tokenize(sent) + # sent = ['[CLS]'] + sent + ['[SEP]'] + # input_ids += tokenizer.convert_tokens_to_ids(sent) + model_bert, model_visual, video_normalizer = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model) + inputs = torch.rand(2,215,1024) + video_mask = torch.ones(2,215) + inputs = video_normalizer(inputs) + visual_embed = model_visual(inputs, video_mask, output_all_encoded_layers=True)[0][-1] + + breakpoint() \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/__init__.py b/yc2_univl/backup/pdvc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..194ecd26a483cef3e67c0e5cd971d4f7784aac67 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a406cf3565bfcd54eddc5d19fbeae7bffd2d629 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4a911fc83c9364bfc6b98dd5d3d5a4ed14f5e3f Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9cafe04379877ab0c87872ae9835aa9bdf4532a4 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6961cba44a3fa93be1463250d574c8d91411714f Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d08274f898128d993db3370b9307fabf56c98f6 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f48fdb961f47546c71e60e995699a206b62a4f6a Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d45de6e0f900d019a24e0f339e62874f2038557e Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6efd23cdeac69c752a715a184606139f2aded19b Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f80042e195d3ecda40db7fe17e8b2b6b8991a376 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5958f3c996e09fc92224c0dfbc6f1585d0c2b6c Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c50c9f41bc67334949478d72b69f998d849c9f37 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2b9fbde23c0b61d1377c3e8a2c9af095131c45d Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e503a7b7440cff82242de19b9d909ba99e5f803 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df84303c83b25082e579d99e0bdbc7c05bf182ef Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a9e333fb5a96578c8f8c3017ccf7d80466fff6f Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff5a503c7efa463a12801a2f62599ed146e5ca93 Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/base_encoder.py b/yc2_univl/backup/pdvc/base_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6cb150a62dbb709589ec5271fe1b11ec16adf8f8 --- /dev/null +++ b/yc2_univl/backup/pdvc/base_encoder.py @@ -0,0 +1,86 @@ +# ------------------------------------------------------------------------ +# PDVC +# ------------------------------------------------------------------------ +# Modified from Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Base Encoder to create multi-level conv features and positional embedding. +""" + +import torch +import torch.nn.functional as F +from torch import nn +from misc.detr_utils.misc import NestedTensor +from .position_encoding import PositionEmbeddingSine + + +class BaseEncoder(nn.Module): + def __init__(self, num_feature_levels, vf_dim, hidden_dim): + super(BaseEncoder, self).__init__() + self.pos_embed = PositionEmbeddingSine(hidden_dim//2, normalize=True) + self.num_feature_levels = num_feature_levels + self.hidden_dim = hidden_dim + + if num_feature_levels > 1: + input_proj_list = [] + in_channels = vf_dim + input_proj_list.append(nn.Sequential( + nn.Conv1d(in_channels, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )) + for _ in range(num_feature_levels - 1): + input_proj_list.append(nn.Sequential( + nn.Conv1d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1), + nn.GroupNorm(32, hidden_dim), + )) + in_channels = hidden_dim + self.input_proj = nn.ModuleList(input_proj_list) + else: + self.input_proj = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(vf_dim, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )]) + + for proj in self.input_proj: + nn.init.xavier_uniform_(proj[0].weight, gain=1) + nn.init.constant_(proj[0].bias, 0) + + def forward(self, vf, mask, duration): + # vf: (N, L, C), mask: (N, L), duration: (N) + vf = vf.transpose(1, 2) # (N, L, C) --> (N, C, L) + vf_nt = NestedTensor(vf, mask, duration) + pos0 = self.pos_embed(vf_nt) + + srcs = [] + masks = [] + poses = [] + + src0, mask0 = vf_nt.decompose() + srcs.append(self.input_proj[0](src0)) + masks.append(mask0) + poses.append(pos0) + assert mask is not None + + for l in range(1, self.num_feature_levels): + if l == 1: + src = self.input_proj[l](vf_nt.tensors) + else: + src = self.input_proj[l](srcs[-1]) + m = vf_nt.mask + mask = F.interpolate(m[None].float(), size=src.shape[-1:]).to(torch.bool)[0] + pos_l = self.pos_embed(NestedTensor(src, mask, duration)).to(src.dtype) + srcs.append(src) + masks.append(mask) + poses.append(pos_l) + return srcs, masks, poses + +def build_base_encoder(args): + base_encoder = BaseEncoder(args.num_feature_levels, args.feature_dim, args.hidden_dim) + return base_encoder diff --git a/yc2_univl/backup/pdvc/criterion.py b/yc2_univl/backup/pdvc/criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..d47eb41a6711be9904ad6c55d502572261ff73c9 --- /dev/null +++ b/yc2_univl/backup/pdvc/criterion.py @@ -0,0 +1,726 @@ +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ +import copy +import torch +import torch.nn.functional as F +from torch import nn + +from misc.detr_utils import box_ops +from misc.detr_utils.misc import (accuracy, get_world_size, + is_dist_avail_and_initialized) + +class SetCriterion(nn.Module): + """ This class computes the loss for DETR. + The process happens in two steps: + 1) we compute hungarian assignment between ground truth boxes and the outputs of the model + 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) + """ + def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + losses: list of all the losses to be applied. See get_loss for list of available losses. + focal_alpha: alpha in Focal Loss + """ + super().__init__() + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_alpha = focal_alpha + self.focal_gamma = focal_gamma + self.opt = opt + self.pseudo_box_aug = opt.pseudo_box_aug + self.refine_pseudo_box = opt.refine_pseudo_box + if ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]): + counter_class_rate =[0.0, 0.012703673018503175, 0.04915769124551229, 0.06489919911626622, 0.0740127036730185, 0.07346037006351837, 0.08064070698702017, + 0.07069870201601768, 0.07870753935376967, 0.07097486882076774, 0.06766086716376692, 0.0579950289975145, 0.05247169290251312, 0.03783485225075946, + 0.03534935100800884, 0.03203534935100801, 0.026788180060756697, 0.02236951118475559, 0.01988400994200497, 0.016570008285004142, 0.013256006628003313, + 0.00856117094725214, 0.006904170118751726, 0.005523336095001381, 0.004694835680751174, 0.0038663352665009665, 0.0027616680475006906, 0.0027616680475006906, + 0.0016570008285004142, 0.0016570008285004142, 0.0005523336095001381, 0.0008285004142502071, 0.0, 0.00027616680475006904, 0.0, 0.0, 0.00027616680475006904, + 0.0011046672190002762, 0.0, 0.0005523336095001381, 0.0, 0.0, 0.0005523336095001381] + else: + counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01, + 1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02, + 1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03, + 2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04, + 4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04, + 2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05] + self.counter_class_rate = torch.tensor(counter_class_rate) + + def loss_labels(self, outputs, targets, indices, num_boxes, log=True): + """Classification loss (NLL) + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + indices, many2one_indices = indices + assert 'pred_logits' in outputs + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + target_classes[idx] = target_classes_o + + target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1], + dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device) + target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1) + + target_classes_onehot = target_classes_onehot[:,:,:-1] + loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1] + losses = {'loss_ce': loss_ce} + pred_count = outputs['pred_count'] + max_length = pred_count.shape[1] - 1 + counter_target = [len(target['boxes']) if len(target['boxes']) < max_length else max_length for target in targets] + counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long) + counter_target_onehot = torch.zeros_like(pred_count) + counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1) + weight = self.counter_class_rate[:max_length + 1].to(src_logits.device) + + counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight) + losses['loss_counter'] = counter_loss + + return losses + + @torch.no_grad() + def loss_cardinality(self, outputs, targets, indices, num_boxes): + """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes + This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients + """ + pred_logits = outputs['pred_logits'] + device = pred_logits.device + tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) + # Count the number of predictions that are NOT "no-object" (which is the last class) + card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) + card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) + losses = {'cardinality_error': card_err} + return losses + + def loss_boxes(self, outputs, targets, indices, num_boxes): + """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss + targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 2] + The target boxes are expected in format (center, length), normalized by the image size. + """ + indices, many2one_indices = indices + N = len(indices[-1][0]) + assert 'pred_boxes' in outputs + idx, idx2 = self._get_src_permutation_idx2(indices) + src_boxes = outputs['pred_boxes'][idx] + if self.opt.use_pseudo_box and self.training: + # print('use pseudo box') + target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0) + else: + # print('use gt box') + target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0) + loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') + + losses = {} + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(target_boxes))) + losses['loss_giou'] = loss_giou.sum() / num_boxes + # print(src_boxes) + self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1) + sizes = [len(v[0]) for v in indices] + if sizes == [1]: + losses['loss_self_iou'] = self_iou + return losses + self_iou_split = 0 + for i, c in enumerate(self_iou.split(sizes, -1)): + cc = c.split(sizes, -2)[i] + self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1)) + has_nan = False if torch.all(~torch.isnan(self_iou_split)) else True + has_inf = False if torch.all(torch.isfinite(self_iou_split)) else True + if has_nan or has_inf: + breakpoint() + losses['loss_self_iou'] = self_iou_split + + return losses + + def _get_src_permutation_idx(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx + + def _get_src_permutation_idx2(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + src_idx2 = torch.cat([src for (_, src) in indices]) + return (batch_idx, src_idx), src_idx2 + + def _get_tgt_permutation_idx(self, indices): + # permute targets following indices + batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) + tgt_idx = torch.cat([tgt for (_, tgt) in indices]) + return batch_idx, tgt_idx + + + + def get_jittered_box(self, box, box_jitter, box_aug_num=5, mode='random'): + # breakpoint() + box = box.unsqueeze(0) # (1,2) + if mode == 'random': + scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale = torch.cat([scale_c, scale_d], dim=1) + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel() + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]] + elif mode == 'random_new': + scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter) + scale = torch.cat([scale_c, scale_d], dim=1) + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel() + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]] + elif mode == 'uniform': + ratio_c = box_jitter + ratio_d = 0.048 / 2 + scale_c = torch.tensor([-ratio_c, -ratio_c/2, -ratio_c/4, ratio_c/4, ratio_c/2, ratio_c]) + scale_d = torch.tensor([-ratio_d, -ratio_d/2, ratio_d/2, ratio_d]) + scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) + breakpoint() + scale_box = box + scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + unkeep_idx = torch.where(iou.reshape(-1) <= 0.1)[0] + if keep_idx.numel() < (box_aug_num-1): + box_repeat = box.repeat(box_aug_num, 1) + box_repeat[:keep_idx.numel()] = scale_box[keep_idx] + random_indices = torch.randperm(unkeep_idx.size(0))[:(box_aug_num-1-keep_idx.numel())] + box_repeat[keep_idx.numel():(box_aug_num-1)] = scale_box[unkeep_idx[random_indices]] + else: + box_repeat = box.repeat(box_aug_num, 1) + random_indices = torch.randperm(keep_idx.numel())[:(box_aug_num-1)] + box_repeat[:box_aug_num-1] = scale_box[keep_idx[random_indices]] + elif mode == 'uniform_old': + # Conduct augment using pre-defined ratio + ratio_c = box_jitter + ratio_d = box_jitter + scale_c = torch.linspace(1-ratio_c, 1+ratio_c, 4) + scale_d = torch.linspace(1-ratio_d, 1+ratio_d, 2) + scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) # 16 augmented boxes in total + scale_box = box * scale + scale_box = scale_box.clamp(min=0., max=1.) + iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box)) + # keep_idx = torch.where(iou.reshape(-1) > 0.1)[0] + box_repeat = box.repeat(box_aug_num, 1) + random_indices = torch.randperm(scale_box.size(0))[:(box_aug_num-1)] + box_repeat[:(box_aug_num-1)] = scale_box[random_indices] + elif mode == 'random_range': + def batch_randomize_boxes(boxes, max_vary_range, num_samples=1): + # Get the centers and widths from the input boxes + centers = boxes[:, 0] + widths = boxes[:, 1] + # breakpoint() + # Generate random values for the left and right boundaries for each box + + left_boundaries = centers - (widths / 2) - torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range) + right_boundaries = centers + (widths / 2) + torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range) + + # Ensure that the boundaries stay within the [0, 1] range + left_boundaries = left_boundaries.clamp(0, 1) + right_boundaries = right_boundaries.clamp(0, 1) + + + # Calculate the new centers and widths + new_centers = (left_boundaries + right_boundaries) / 2 + new_widths = right_boundaries - left_boundaries + + # Ensure that the widths are non-negative and revert to the original boxes if needed + is_negative = new_widths <= 0 + new_widths = torch.where(is_negative, widths, new_widths) + new_centers = torch.where(is_negative, centers, new_centers) + + # Create and return the new boxes tensor + new_boxes = torch.stack((new_centers, new_widths), dim=2) + return new_boxes.squeeze(0) + box_repeat = batch_randomize_boxes(box, box_jitter, box_aug_num) + if torch.isnan(box_repeat).any(): + breakpoint() + elif mode == 'augment_width': # original width is 0.5 \sigma range + import random + def augment_boxes_with_scale(boxes, scale, num_augments): + augmented_boxes = [] + for _ in range(num_augments): + center, width = boxes[0] + # Generate a random scale factor with a more uniform distribution + random_scale = scale ** random.uniform(-1, 1) + new_width = width * random_scale + if center + new_width / 2 > 1 or center - new_width / 2 < 0: + new_width = width + augmented_boxes.append([center, new_width]) + augmented_boxes = torch.tensor(augmented_boxes, device=boxes.device) + return augmented_boxes + box_repeat = augment_boxes_with_scale(box, box_jitter, box_aug_num) + # breakpoint() + + else: + raise NotImplementedError('Not support box augmentation mode: {}'.format(mode)) + return box_repeat + + def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'cardinality': self.loss_cardinality, + 'boxes': self.loss_boxes, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) + + def forward(self, outputs, targets, others=None, aug_num=None, aug_ratio=None): + """ This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + targets: list of dicts, such that len(targets) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'} + if self.training and self.pseudo_box_aug: + targets_cp = copy.deepcopy(targets) + assert self.opt.use_pseudo_box + for i in range((len(targets_cp))): + boxes_aug = [] + for j in range(len(targets_cp[i]['labels'])): + try: + pseudo_box = targets_cp[i]['boxes_pseudo'][j] + except: + breakpoint() + peseudo_box_aug = self.get_jittered_box(pseudo_box, aug_ratio, aug_num, self.opt.pseudo_box_aug_mode) + boxes_aug.append(peseudo_box_aug) + targets_cp[i]['boxes_pseudo'] = torch.cat(boxes_aug, dim=0) + targets_cp[i]['labels'] = targets_cp[i]['labels'].unsqueeze(dim=1).repeat(1, aug_num).reshape(-1,) + targets[i]['box_pseudo_aug'] = torch.cat(boxes_aug, dim=0) + # Retrieve the matching between the outputs of the last layer and the targets + last_indices = self.matcher(outputs_without_aux, targets_cp) + else: + targets_cp = targets + last_indices = self.matcher(outputs_without_aux, targets) + outputs['matched_indices'] = last_indices + + num_boxes = sum(len(t["labels"]) for t in targets_cp) + num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + + # Compute all the requested losses + losses = {} + for loss in self.losses: + kwargs = {} + losses.update(self.get_loss(loss, outputs, targets_cp, last_indices, num_boxes, **kwargs)) + + # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. + if 'aux_outputs' in outputs: + aux_indices = [] + for i, aux_outputs in enumerate(outputs['aux_outputs']): + indices = self.matcher(aux_outputs, targets_cp) + aux_indices.append(indices) + for loss in self.losses: + if loss == 'masks': + # Intermediate masks losses are too costly to compute, we ignore them. + continue + kwargs = {} + if loss == 'labels': + # Logging is enabled only for the last layer + kwargs['log'] = False + l_dict = self.get_loss(loss, aux_outputs, targets_cp, indices, num_boxes, **kwargs) + l_dict = {k + f'_{i}': v for k, v in l_dict.items()} + losses.update(l_dict) + + return losses, last_indices, aux_indices + return losses, last_indices + +class AlignCriterion(nn.Module): + """ This class computes the loss for DETR. + The process happens in two steps: + 1) we compute DTW assignment between ground truth captions and the outputs object queries + 2) we supervise each pair of matched ground-truth / prediction (supervise class) + """ + def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + losses: list of all the losses to be applied. See get_loss for list of available losses. + focal_alpha: alpha in Focal Loss + """ + super().__init__() + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_alpha = focal_alpha + self.focal_gamma = focal_gamma + self.opt = opt + counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01, + 1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02, + 1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03, + 2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04, + 4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04, + 2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05] + self.counter_class_rate = torch.tensor(counter_class_rate) + + def loss_labels(self, outputs, targets, indices, num_boxes, log=True): + """Classification loss (NLL) + Compute the classification loss and counter loss + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + indices, many2one_indices = indices + assert 'pred_logits' in outputs + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + target_classes[idx] = target_classes_o + + target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1], + dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device) + target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1) + + target_classes_onehot = target_classes_onehot[:,:,:-1] + loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1] + losses = {'loss_ce': loss_ce} + + pred_count = outputs['pred_count'] + max_length = pred_count.shape[1] - 1 + counter_target = [len(target['boxes']) if len(target['boxes']) < max_length else max_length for target in targets] + counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long) + counter_target_onehot = torch.zeros_like(pred_count) + counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1) + weight = self.counter_class_rate[:max_length + 1].to(src_logits.device) + # breakpoint() + counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight) + losses['loss_counter'] = counter_loss + + return losses + + def loss_boxes(self, outputs, targets, indices, num_boxes): + # Compute temporal IOU loss among given predicted N temporal boundaries, which encourages the temporal boundaries to be more diverse and no overlap + # outputs: (bsz, num_query, 2) + # breakpoint() + # breakpoint() + indices, many2one_indices = indices + idx, idx2 = self._get_src_permutation_idx2(indices) + src_boxes = outputs['pred_boxes'][idx] # num_boxes, 2 + avg_duration = torch.mean(src_boxes[:, 1]) + center_point = src_boxes[:,0] + N = len(indices[-1][0]) + + losses = {} + + if self.opt.use_pseudo_box and self.training: + # If generate peseudo ground truth boxes from alignment, use the alignment boxes as the target boxes + target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0) + loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(target_boxes))) + losses['loss_giou'] = loss_giou.sum() / num_boxes + + if not self.opt.use_pseudo_box: + ## Squence Ordering loss + rank_margin = 0.01 + pairs = torch.combinations(torch.arange(center_point.size(0)), 2) + rank_dist = center_point[pairs[:, 0]] - center_point[pairs[:, 1]] + rank_margin + # Make sure that the center points are ordered + rank_loss = torch.relu(rank_margin + rank_dist).mean() + + losses['loss_ref_rank'] = rank_loss + + ## Self IOU loss + prior_duration = 0.06 + self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes), + box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1) + sizes = [len(v[0]) for v in indices] + self_iou_split = 0 + for i, c in enumerate(self_iou.split(sizes, -1)): + cc = c.split(sizes, -2)[i] + self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1)) + duration_constraint = torch.abs(prior_duration/(avg_duration + 1e-6) - 1) + self_iou_split += duration_constraint + + + losses['loss_self_iou'] = self_iou_split + + return losses + + @torch.no_grad() + def loss_cardinality(self, outputs, targets, indices, num_boxes): + """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes + This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients + """ + pred_logits = outputs['pred_logits'] + device = pred_logits.device + tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) + # Count the number of predictions that are NOT "no-object" (which is the last class) + card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) + card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) + losses = {'cardinality_error': card_err} + return losses + + def _get_src_permutation_idx(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx + + def _get_src_permutation_idx2(self, indices): + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + src_idx2 = torch.cat([src for (_, src) in indices]) + return (batch_idx, src_idx), src_idx2 + + def _get_tgt_permutation_idx(self, indices): + # permute targets following indices + batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) + tgt_idx = torch.cat([tgt for (_, tgt) in indices]) + return batch_idx, tgt_idx + + def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'boxes': self.loss_boxes, + 'cardinality': self.loss_cardinality, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) + + def forward(self, outputs, targets, others): + """ This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + targets: list of dicts, such that len(targets) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + text_embed = others['text_embed'] # num_dec_layers, num_sentence, dim + event_embed = others['event_embed'] # num_dec_layers, num_query, dim + dim = event_embed.shape[-1] + + # Retrieve the matching between the outputs of the last layer and the targets + # if self.opt.matcher_type == 'DTW': + # last_indices = self.matcher(text_embed[-1], event_embed[-1].reshape(-1, dim)) + # elif self.opt.matcher_type == 'Sim': + # last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + # else: + # raise NotImplementedError('Align Criterion does not support:{}'.format(self.opt.matcher_type)) + #breakpoint() + last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + outputs['matched_indices'] = last_indices + + num_boxes = sum(len(t["labels"]) for t in targets) + num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + # Compute all the requested losses + losses = {} + for loss in self.losses: + kwargs = {} + losses.update(self.get_loss(loss, outputs, targets, last_indices, num_boxes, **kwargs)) + + # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. + if 'aux_outputs' in outputs: + aux_indices = [] + for i, aux_outputs in enumerate(outputs['aux_outputs']): + indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim)) + aux_indices.append(indices) + for loss in self.losses: + kwargs = {} + if loss == 'labels': + # Logging is enabled only for the last layer + kwargs['log'] = False + l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) + l_dict = {k + f'_{i}': v for k, v in l_dict.items()} + losses.update(l_dict) + + return losses, last_indices, aux_indices + return losses, last_indices + +class ContrastiveCriterion(nn.Module): + ''' + Contrastive loss between event feature and caption feature + ''' + + def __init__(self, temperature=0.1, enable_cross_video_cl=False, enable_e2t_cl=False, enable_bg_for_cl=False): + super().__init__() + self.temperature = temperature + self.enable_cross_video_cl = enable_cross_video_cl + self.enable_e2t_cl = enable_e2t_cl + self.enable_bg_for_cl = enable_bg_for_cl + + def forward_logits(self, text_embed, event_embed, bg_embed=None): + normalized_text_emb = F.normalize(text_embed, p=2, dim=1) + normalized_event_emb = F.normalize(event_embed, p=2, dim=1) + logits = torch.mm(normalized_text_emb, normalized_event_emb.t()) + if bg_embed is not None: + bg_logits = torch.sum(normalized_event_emb * F.normalize(bg_embed, p=2), dim=1) + logits = torch.cat((logits, bg_logits.unsqueeze(0)), dim=0) + return logits + + + def forward(self, text_embed, event_embed, matching_indices, return_logits=False, bg_embed=None): + + ''' + :param text_embed: [(event_num, contrastive_hidden_size)], len = batch size + total_event_number = sum of event number of each item in current batch + :param event_embed: (bsz, max_event_num, contrastive_hiddent_size), which need to be + expand in this function + :param matching_indices: (bsz, event_num) + ''' + batch_size, max_event_num, _ = event_embed.shape + event_embed, text_embed, gt_labels, gt_event_num = self._preprocess(event_embed, [text_embed], matching_indices) + raw_logits = self.forward_logits(text_embed, event_embed) + logits = raw_logits / self.temperature + + if self.enable_cross_video_cl: + t2e_loss = F.cross_entropy(logits, gt_labels) + if self.enable_e2t_cl: + gt_label_matrix = torch.zeros(len(text_embed) + 1, len(event_embed), device=text_embed.device) + gt_label_matrix[torch.arange(len(gt_labels)), gt_labels] = 1 + event_mask = gt_label_matrix.sum(dim=0) == 0 + gt_label_matrix[-1, event_mask] = 1 + e2t_gt_label = gt_label_matrix.max(dim=0)[1] + bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1) + e2t_logits = torch.cat((logits, bg_logits.unsqueeze(0) / self.temperature), dim=0) + if self.enable_bg_for_cl: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label) + else: + e2t_loss = F.cross_entropy(e2t_logits.t()[~event_mask], e2t_gt_label[~event_mask]) + loss = 0.5 * (t2e_loss + e2t_loss) + else: + loss = t2e_loss + else: + loss = 0; base = 0 + for i in range(batch_size): + current_gt_event_num = gt_event_num[i] + current_logits = logits[base: base + current_gt_event_num, i * max_event_num: (i + 1) * max_event_num] + current_gt_labels = gt_labels[base: base + current_gt_event_num] + t2e_loss = F.cross_entropy(current_logits, current_gt_labels) + if self.enable_e2t_cl: + gt_label_matrix = torch.zeros(gt_event_num[i] + 1, max_event_num, device=text_embed.device) + gt_label_matrix[torch.arange(current_gt_labels), current_gt_labels] = 1 + event_mask = gt_label_matrix.sum(dim=0) == 0 + e2t_gt_label = gt_label_matrix.max(dim=0)[1] + bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1) + e2t_logits = torch.cat((current_logits, bg_logits.unsqueeze(0) / self.temperature), dim=0) + if self.enable_bg_for_cl: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label) + else: + e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label, ignore_index=len(text_embed), reduction='sum') / (1e-5 + sum(~event_mask)) + loss += 0.5 * (t2e_loss + e2t_loss) + else: + loss += t2e_loss + base += current_gt_event_num + loss = loss / batch_size + # pdb.set_trace() + if return_logits: + return loss, raw_logits + return loss + + + def _preprocess(self, event_embed, text_embed, matching_indices): + ''' + Flatten event_embed of a batch, get gt label + + :param matching_indices: [(event_num, )] len = bsz + ''' + batch_size, max_event_num, f_dim = event_embed.shape + gt_labels = [] + text_features = [] + gt_event_num = [] + event_features = event_embed.view(-1, f_dim) + for i in range(batch_size): + base = i * max_event_num if self.enable_cross_video_cl else 0 + feat_ids, cap_ids = matching_indices[i] + gt_event_num.append(len(feat_ids)) + text_features.append(text_embed[i][cap_ids]) + gt_labels.append(feat_ids + base) + text_features = torch.cat(text_features, dim=0) + gt_labels = torch.cat(gt_labels, dim=0) + gt_labels = gt_labels.to(event_embed.device) + + return event_features, text_features, gt_labels, gt_event_num + +def cross_entropy_with_gaussian_mask(inputs, targets, opt, weight): + gau_mask = opt.lloss_gau_mask + beta = opt.lloss_beta + + N_, max_seq_len = targets.shape + gassian_mu = torch.arange(max_seq_len, device=inputs.device).unsqueeze(0).expand(max_seq_len, + max_seq_len).float() + x = gassian_mu.transpose(0, 1) + gassian_sigma = 2 + mask_dict = torch.exp(-(x - gassian_mu) ** 2 / (2 * gassian_sigma ** 2)) + _, ind = targets.max(dim=1) + mask = mask_dict[ind] + + loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none", weight= 1 - weight) + if gau_mask: + coef = targets + ((1 - mask) ** beta) * (1 - targets) + else: + coef = targets + (1 - targets) + loss = loss * coef + loss = loss.mean(1) + return loss.mean() + +def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): + """ + Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. + Args: + inputs: A float tensor of arbitrary shape. + The predictions for each example. + targets: A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + alpha: (optional) Weighting factor in range (0,1) to balance + positive vs negative examples. Default = -1 (no weighting). + gamma: Exponent of the modulating factor (1 - p_t) to + balance easy vs hard examples. + Returns: + Loss tensor + """ + + prob = inputs.sigmoid() + ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") # with_logits func calculates sigmoid and CE jointly + p_t = prob * targets + (1 - prob) * (1 - targets) + loss = ce_loss * ((1 - p_t) ** gamma) + + if alpha >= 0: + alpha_t = alpha * targets + (1 - alpha) * (1 - targets) + loss = alpha_t * loss + + return loss.mean(1).sum() / num_boxes + +def regression_loss(inputs, targets, opt, weight): + inputs = F.relu(inputs) + 2 + max_id = torch.argmax(targets, dim=1) + if opt.regression_loss_type == 'l1': + loss = nn.L1Loss()(inputs[:, 0], max_id.float()) + elif opt.regression_loss_type == 'l2': + loss = nn.MSELoss()(inputs[:, 0], max_id.float()) + return loss \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/deformable_transformer.py b/yc2_univl/backup/pdvc/deformable_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..5e9b742061b166e0badc41db80f5423b0e46a746 --- /dev/null +++ b/yc2_univl/backup/pdvc/deformable_transformer.py @@ -0,0 +1,496 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +import copy +import math + +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn.init import xavier_uniform_, constant_, normal_ + +from misc.detr_utils.misc import inverse_sigmoid +from pdvc.ops.modules import MSDeformAttn + + +class DeformableTransformer(nn.Module): + def __init__(self, d_model=256, nhead=8, + num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1, + activation="relu", return_intermediate_dec=False, + num_feature_levels=4, dec_n_points=4, enc_n_points=4, use_anchor=False): + super().__init__() + + self.d_model = d_model + self.nhead = nhead + self.use_anchor = use_anchor + + self.no_encoder = (num_encoder_layers == 0) + self.num_feature_levels = num_feature_levels + + encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward, + dropout, activation, + num_feature_levels, nhead, enc_n_points) + self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers) + + decoder_layer = DeformableTransformerDecoderLayer(d_model, dim_feedforward, + dropout, activation, + num_feature_levels, nhead, dec_n_points) + self.decoder = DeformableTransformerDecoder(decoder_layer, num_decoder_layers, return_intermediate_dec, d_model, use_anchor) + + self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model)) + + self.pos_trans = nn.Linear(d_model, d_model * 2) + self.pos_trans_norm = nn.LayerNorm(d_model * 2) + self.reference_points = nn.Linear(d_model, 1) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + for m in self.modules(): + if isinstance(m, MSDeformAttn): + m._reset_parameters() + # if not self.use_anchor: + xavier_uniform_(self.reference_points.weight.data, gain=1.0) + constant_(self.reference_points.bias.data, 0.) + normal_(self.level_embed) + + + def get_proposal_pos_embed(self, proposals): + num_pos_feats = 256 + temperature = 10000 + scale = 2 * math.pi + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) + # N, L, 2 + proposals = proposals.sigmoid() * scale + # N, L, 2, 256 + pos = proposals[:, :, :, None] / dim_t + # N, L, 2, 128, 2 + pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()), dim=4).flatten(2) + return pos + + def get_proposal_pos_embed_1d(self, proposals): + num_pos_feats = 512 + temperature = 10000 + scale = 2 * math.pi + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) + + # N, L + proposals = proposals.sigmoid() * scale + # N, L, 512 + pos = proposals[:, None] / dim_t + + pos = torch.stack((pos[:, 0::2].sin(), pos[:, 1::2].cos()), dim=2).flatten(1) + return pos + + def get_valid_ratio(self, mask): + valid_ratio_L = torch.sum(~mask, 1).float() / mask.shape[1] + return valid_ratio_L + + def prepare_encoder_inputs(self, srcs, masks, pos_embeds): + # prepare input for encoder + src_flatten = [] + mask_flatten = [] + lvl_pos_embed_flatten = [] + temporal_shapes = [] + for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)): + """ + lvl: (bs, ) + src: (bs, c, L ) + mask: (bs, L) + pos_embed: (bs, d_m, L) + """ + bs, c, L = src.shape + temporal_shapes.append(L) + src = src.transpose(1, 2) # (bs, L, c) + pos_embed = pos_embed.transpose(1, 2) # #(bs, L, d_m) + lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1) + lvl_pos_embed_flatten.append(lvl_pos_embed) + src_flatten.append(src) + mask_flatten.append(mask) + src_flatten = torch.cat(src_flatten, 1) # (lvl_num, bs, wh, c) + mask_flatten = torch.cat(mask_flatten, 1) # (lvl_num, bs, wh) + lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1) # (lvl_num, bs, wh, d_m) + temporal_shapes = torch.as_tensor(temporal_shapes, dtype=torch.long, device=src_flatten.device) # (lvl_num, 2) + level_start_index = torch.cat((temporal_shapes.new_zeros((1,)), temporal_shapes.cumsum(0)[ + :-1])) # prod: [w0h0, w0h0+w1h1, w0h0+w1h1+w2h2, ...] + valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks], + 1) # (bs, lvl_num, 2), where 2 means (h_rate, and w_rate), all values <= 1 + + return src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten + + def forward_encoder(self, src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, + mask_flatten): + # encoder + if self.no_encoder: + memory = src_flatten + else: + memory = self.encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, + mask_flatten) + + return memory + + def prepare_decoder_input_query(self, memory, query_embed): + bs, _, _ = memory.shape + query_embed, tgt = torch.chunk(query_embed, 2, dim=1) + query_embed = query_embed.unsqueeze(0).expand(bs, -1, -1) + tgt = tgt.unsqueeze(0).expand(bs, -1, -1) + reference_points = self.reference_points(query_embed).sigmoid() # (bs, object_query, 1) + init_reference_out = reference_points # (bs, object_query, 1) + return init_reference_out, tgt, reference_points, query_embed + + def prepare_init_anchor_and_query(self, anchor_embed, hidden_dim, random_anchor_init=False, prior_anchor_duration_init=False, prior_duration=0.048): + num_queries = anchor_embed.weight.shape[0] + # query_embed = nn.Embedding(num_queries, hidden_dim) + if random_anchor_init: + anchor_embed.weight.data[:, :1] = torch.linspace(0, 1, num_queries).unsqueeze(1) + anchor_embed.weight.data[:, :1] = inverse_sigmoid(anchor_embed.weight.data[:, :1]) + print('Initilize the anchor center point with uniform distribution') + #self.anchor_embed.weight.data[:, :1].requires_grad = False # DAB-anchor set this to be False + anchor_embed.weight.data[:, :1].requires_grad = True # I set it to be True + # breakpoint() + if prior_anchor_duration_init: + # TODO: add prior anchor duration initialization, the below implementation is not correct + torch.nn.init.constant_(anchor_embed.weight.data[:, 1:], prior_duration) + anchor_embed.weight.data[:, 1:] = inverse_sigmoid(anchor_embed.weight.data[:, 1:]) + anchor_embed.weight.data[:, 1:].requires_grad = True + print('Initilize the anchor duration point with: {}'.format(prior_duration)) + reference_points = anchor_embed.weight.data.detach().clone().sigmoid().unsqueeze(0).expand(1, -1, -1) + topk_coords_unact = inverse_sigmoid(reference_points[0, :, 0]) + query_embed = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) # Position embedding receives non-sigmoided coordinates + # breakpoint() + return query_embed + + def prepare_decoder_input_anchor(self, memory, query_anchor): + bs, _, _ = memory.shape + query_embed, anchor = query_anchor + position_embedding, tgt = torch.chunk(query_embed, 2, dim=1) + position_embedding = position_embedding.unsqueeze(0).expand(bs, -1, -1) + tgt = tgt.unsqueeze(0).expand(bs, -1, -1) + reference_points = anchor.sigmoid().unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, 2) + # tgt = query_embed[..., :self.d_model] + # tgt = tgt.unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, query_dim) + init_reference_out = reference_points + + # topk_coords_unact = inverse_sigmoid(reference_points) + # position_embeding = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) + return init_reference_out, tgt, reference_points, position_embedding + + def prepare_decoder_input_prior(self, proposals, num_queries=100): + ''' + :param proposals: (batch, num_sentence, 2) + ''' + bs,_,_ = proposals.shape + # Uniformly generate normalized coordinates according to number of sentences + reference_points_list = [] + for i in range(bs): + # Generate N-1 points from 0~1 for each sentence uniformly + ns = proposals[i].shape[0] # number of sentences + reference_points_c = torch.linspace(0,1, 2*ns+1, dtype=torch.float32, device=proposals.device) + reference_points_c = reference_points_c[1:-1:2] # (num_sentence,) + reference_points_d = torch.Tensor([1.0/ns]).to(proposals.device).repeat(ns) # (num_sentence,) + reference_points = torch.stack([reference_points_c, reference_points_d], -1) # (num_sentence, 2) + # Padding the reference point to the same length + + num_query_per_sentence = num_queries // ns + reference_points = reference_points.repeat(1, num_query_per_sentence).reshape(-1,2) # (num_queries, 2) + if num_queries % ns != 0: # Padding with zeros + num_padding = num_queries - num_query_per_sentence * ns + padding = torch.Tensor([[1.0, 1.0/ns]]).to(proposals.device).repeat(num_padding, 1) + reference_points = torch.cat([reference_points, padding], 0) + reference_points_list.append(reference_points) + reference_points = torch.stack(reference_points_list, 0) # (batch, num_queries, 2) + init_reference_out = reference_points[:,:,:1] + topk_coords_unact = inverse_sigmoid(reference_points) + pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim) + query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) + return init_reference_out, tgt, reference_points[:,:,:1], query_embed + + def prepare_decoder_input_proposal(self, gt_reference_points): + ''' + :param gt_reference_points: (batch, num_sentence, 2) + ''' + #breakpoint() + topk_coords_unact = inverse_sigmoid(gt_reference_points) + reference_points = gt_reference_points + init_reference_out = reference_points + pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim) + query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) # Split to query_embed and position_embed (bs, num_sentence, hidden_dim, 2) + return init_reference_out, tgt, reference_points, query_embed + + def forward_decoder(self, *kargs): + hs, inter_references_out = self.decoder(*kargs) + return hs, inter_references_out + + +class DeformableTransformerEncoderLayer(nn.Module): + def __init__(self, + d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # self attention + self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout2 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout3 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, src): + src2 = self.linear2(self.dropout2(self.activation(self.linear1(src)))) + src = src + self.dropout3(src2) + src = self.norm2(src) + return src + + def forward(self, src, pos, reference_points, temporal_shapes, level_start_index, padding_mask=None): + # self attention + src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, temporal_shapes, level_start_index, + padding_mask) + src = src + self.dropout1(src2) + src = self.norm1(src) + + # ffn + src = self.forward_ffn(src) + + return src + + +class DeformableTransformerEncoder(nn.Module): + def __init__(self, encoder_layer, num_layers): + super().__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + + @staticmethod + def get_reference_points(temporal_shapes, valid_ratios, device): + reference_points_list = [] + for lvl, (L_) in enumerate(temporal_shapes): + ref = torch.linspace(0.5, L_ - 0.5, L_, dtype=torch.float32, device=device) + ref = ref.reshape(-1)[None] / (valid_ratios[:, None, lvl] * L_) + reference_points_list.append(ref) + reference_points = torch.cat(reference_points_list, 1) + reference_points = reference_points[:, :, None] * valid_ratios[:, None] + reference_points = reference_points[:,:,:,None] + return reference_points + + def forward(self, src, temporal_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None): + output = src + reference_points = self.get_reference_points(temporal_shapes, valid_ratios, device=src.device) + for _, layer in enumerate(self.layers): + output = layer(output, pos, reference_points, temporal_shapes, level_start_index, padding_mask) + + return output + + +class DeformableTransformerDecoderLayer(nn.Module): + def __init__(self, d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # cross attention + self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # self attention + self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout) + self.dropout2 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout3 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout4 = nn.Dropout(dropout) + self.norm3 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, tgt): + tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt)))) + tgt = tgt + self.dropout4(tgt2) + tgt = self.norm3(tgt) + return tgt + + def forward(self, tgt, query_pos, reference_points, src, src_temporal_shapes, level_start_index, + src_padding_mask=None, query_mask=None): + # self attention + q = k = self.with_pos_embed(tgt, query_pos) + tgt2 = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), tgt.transpose(0, 1), key_padding_mask=~query_mask)[ + 0].transpose(0, 1) + tgt = tgt + self.dropout2(tgt2) + tgt = self.norm2(tgt) + + # cross attention + tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos), + reference_points, + src, src_temporal_shapes, level_start_index, src_padding_mask) + tgt = tgt + self.dropout1(tgt2) + tgt = self.norm1(tgt) + + # ffn + tgt = self.forward_ffn(tgt) + return tgt + + +class DeformableTransformerDecoder(nn.Module): + def __init__(self, decoder_layer, num_layers, return_intermediate=False, d_model=256, use_anchor=False): + super().__init__() + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + self.return_intermediate = return_intermediate + # hack implementation for iterative bounding box refinement and two-stage Deformable DETR + self.bbox_head = None + self.use_anchor = use_anchor + self.d_model = d_model + # if use_anchor: + # self.anchor_head = MLP(d_model, d_model, d_model, 2) + # self.scale_head = MLP(d_model, d_model, d_model, 2) + + + def forward(self, tgt, reference_points, src, src_temporal_shapes, src_level_start_index, src_valid_ratios, + query_pos=None, src_padding_mask=None, query_padding_mask=None, disable_iterative_refine=False): + output = tgt + + intermediate = [] + intermediate_reference_points = [] + bs = tgt.shape[0] + for lid, layer in enumerate(self.layers): + if reference_points.shape[-1] == 2: + reference_points_input = reference_points[:, :, None] \ + * torch.stack([src_valid_ratios, src_valid_ratios], -1)[:, None] + else: + assert reference_points.shape[-1] == 1 + reference_points_input = reference_points[:, :, None] * src_valid_ratios[:, None, :, None] + # if self.use_anchor: + # query_sine_embed = gen_sineembed_for_position(reference_points_input[:,:,0,:], self.d_model) + # raw_query_pos = self.anchor_head(query_sine_embed) # num_query, bs, 256 + # query_scale_embed = self.scale_head(output) if lid != 0 else 1 + # query_pos = query_scale_embed * raw_query_pos + output = layer(output, query_pos, reference_points_input, src, src_temporal_shapes, src_level_start_index, + src_padding_mask, query_padding_mask) + + if self.use_anchor: + assert reference_points.shape[-1] == 2 + + # hack implementation for iterative bounding box refinement + if disable_iterative_refine: + reference_points = reference_points + else: + if (self.bbox_head is not None): + tmp = self.bbox_head[lid](output) + if reference_points.shape[-1] == 2: + new_reference_points = tmp + inverse_sigmoid(reference_points) + new_reference_points = new_reference_points.sigmoid() + else: + assert reference_points.shape[-1] == 1 + new_reference_points = tmp + new_reference_points[..., :1] = tmp[..., :1] + inverse_sigmoid(reference_points) + new_reference_points = new_reference_points.sigmoid() + reference_points = new_reference_points.detach() + else: + reference_points = reference_points + + if self.return_intermediate: + intermediate.append(output) + intermediate_reference_points.append(reference_points) + # breakpoint() + + if self.return_intermediate: + return torch.stack(intermediate), torch.stack(intermediate_reference_points) + + return output, reference_points + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(F"activation should be relu/gelu, not {activation}.") + + +def gen_sineembed_for_position(pos_tensor, d_model): + # n_query, bs, _ = pos_tensor.size() + # sineembed_tensor = torch.zeros(n_query, bs, 256) + hidden_dim = d_model // 2 + scale = 2 * math.pi + dim_t = torch.arange(hidden_dim, dtype=torch.float32, device=pos_tensor.device) + dim_t = 10000 ** (2 * (dim_t // 2) / hidden_dim) + x_embed = pos_tensor[:, :, 0] * scale + pos_x = x_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + if pos_tensor.size(-1) == 1: + pos = pos_x + elif pos_tensor.size(-1) == 2: + w_embed = pos_tensor[:, :, 1] * scale + pos_w = w_embed[:, :, None] / dim_t + pos_w = torch.stack((pos_w[:, :, 0::2].sin(), pos_w[:, :, 1::2].cos()), dim=3).flatten(2) + + pos = torch.cat((pos_x, pos_w), dim=2) + else: + raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1))) + return pos + +class MLP(nn.Module): + """ Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + +def build_deforamble_transformer(args): + return DeformableTransformer( + d_model=args.hidden_dim, + nhead=args.nheads, + num_encoder_layers=args.enc_layers, + num_decoder_layers=args.dec_layers, + dim_feedforward=args.transformer_ff_dim, + dropout=args.transformer_dropout_prob, + activation="relu", + return_intermediate_dec=True, + num_feature_levels=args.num_feature_levels, + dec_n_points=args.dec_n_points, + enc_n_points=args.enc_n_points, + use_anchor=args.use_anchor) diff --git a/yc2_univl/backup/pdvc/dp/CFSA.py b/yc2_univl/backup/pdvc/dp/CFSA.py new file mode 100644 index 0000000000000000000000000000000000000000..135defd0c1a48435405a27e2cc12532d86b5d79a --- /dev/null +++ b/yc2_univl/backup/pdvc/dp/CFSA.py @@ -0,0 +1,327 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange, repeat +from torch.nn import CrossEntropyLoss +import math + +def compute_cls_loss(pred, labels, use_cosface=False): + if use_cosface: + # CosFace Loss + s, m = 30.0, 0.4 + cos_value = torch.diagonal(pred.transpose(0, 1)[labels]) + numerator = s * (cos_value - m) + excl = torch.cat([torch.cat((pred[i, :y], pred[i, y + 1:])).unsqueeze(0) for i, y in enumerate(labels)], dim=0) + denominator = torch.exp(numerator) + torch.sum(torch.exp(s * excl), dim=1) + L = numerator - torch.log(denominator) + loss = -torch.mean(L) + else: + # Softmax Loss + criterion = CrossEntropyLoss().cuda() + loss = criterion(pred, labels) + + return loss + + +def frame_blank_align_loss(seq_features1, seq_features2, step_num): + seq_features1 = seq_features1[:, 1:] + blank2 = seq_features2[:, :1] + seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + K = 2 * step_num + 1 + sparse_seq_features2 = torch.cat((blank2, seq_features2[:, [5, 7, 8, 9, 11, 12, 13, 14], :]), dim=1) + pred = (torch.einsum('bic,bjc->bij', seq_features1, sparse_seq_features2) / math.sqrt(C)).log_softmax(-1) + + D_pre = torch.full((B, K), fill_value=float('-99999999'), device=device) + D_pre[:, 0] = pred[:, 0, 0] + D_pre[:, 1] = pred[:, 0, 1] + + for t in range(1, T): + D_cur = torch.full((B, K), fill_value=float('-99999999'), device=device) + D_cur[:, 0] = D_pre[:, 0] + pred[:, t, 0] + D_cur[:, 1] = torch.logsumexp(torch.stack([D_pre[:, 0], D_pre[:, 1]]), dim=0) + pred[:, t, 1] + + # blank term + blank_pre_ind = torch.arange(1, K, 2)[None, :].repeat(B, 1) + blank_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_pre_ind] + + blank_cur_ind = torch.arange(2, K, 2)[None, :].repeat(B, 1) + blank_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_cur_ind] + + blank_log_prob = torch.logsumexp(torch.stack([blank_pre, blank_cur]), dim=0) + D_cur[:, 2:][:, ::2] = blank_log_prob + pred[:, t, 0][:, None].repeat(1, blank_log_prob.shape[-1]) + + # step term + step_prepre_ind = torch.arange(1, K, 2)[None, :-1].repeat(B, 1) + step_prepre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_prepre_ind] + + step_pre_ind = torch.arange(2, K, 2)[None, :-1].repeat(B, 1) + step_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_pre_ind] + + step_cur_ind = torch.arange(3, K, 2)[None, :].repeat(B, 1) + step_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_cur_ind] + + step_log_prob = torch.logsumexp(torch.stack([step_prepre, step_pre, step_cur]), dim=0) + D_cur[:, 2:][:, 1::2] = step_log_prob + pred[:, t, 2:] + D_pre = D_cur + + fsa_distance = -torch.logsumexp(D_cur[:, -2:], dim=-1) / 13 + loss = fsa_distance.mean(0) + + return loss + + +def consist_step_mining(seq_features1, seq_features2, step_num): + (B, T, C), device = seq_features1.shape, seq_features1.device + + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num - 1 + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k >= 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + repeat_times1 = (segment1[:, 1:] - segment1[:, :-1]).flatten() + repeat_target1 = torch.arange(step_num, device=device).repeat((B, )) + step_index1 = repeat_target1.repeat_interleave(repeat_times1).reshape(B, T) + + repeat_times2 = (segment2[:, 1:] - segment2[:, :-1]).flatten() + repeat_target2 = torch.arange(step_num, device=device).repeat((B, )) + step_index2 = repeat_target2.repeat_interleave(repeat_times2).reshape(B, T) + + div_term = torch.exp(torch.arange(0, C, 2, device=device) * -(math.log(10000.0) / C)) + + pos_emb1 = torch.zeros(B, T, C, device=device) + pos_emb1[:, :, 0::2] = torch.sin(step_index1.unsqueeze(-1) * div_term) + pos_emb1[:, :, 1::2] = torch.cos(step_index1.unsqueeze(-1) * div_term) + + pos_emb2 = torch.zeros(B, T, C, device=device) + pos_emb2[:, :, 0::2] = torch.sin(step_index2.unsqueeze(-1) * div_term) + pos_emb2[:, :, 1::2] = torch.cos(step_index2.unsqueeze(-1) * div_term) + + return pos_emb1, pos_emb2, segment1[:, :-1]+1, segment2[:, :-1]+1 + + + +def consist_step_mining_train(seq_features1, seq_features2, step_num, pair_labels): + # seq_features1 = seq_features1[:, 1:] + # seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] \ + - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \ + - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1) + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = D[:, k-1, None, None, :, :] + block_mat + D[:, k] = tmp.flatten(3).max(-1).values + D_ind[:, k] = tmp.flatten(3).max(-1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k > 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + final_result = D[:, :, T-1, T-1] + + video_seg1 = segment1[:, :-1] + 1 + video_seg2 = segment2[:, :-1] + 1 + + # loss_step = (-(pair_labels * final_result.max(dim=-1).values)).sum() + loss_step = -(pair_labels * final_result.max(dim=-1).values).mean() + + return loss_step, video_seg1, video_seg2 + + + +def consist_step_mining_inference(seq_features1, seq_features2, step_num): + seq_features1 = seq_features1[:, 1:] + seq_features2 = seq_features2[:, 1:] + (B, T, C), device = seq_features1.shape, seq_features1.device + + # pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] \ + - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \ + - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2 + k = step_num + i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4 + + while k > 0: + ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None] + a = ind // T + b = ind % T + segment1 = torch.cat([a, segment1], dim=-1) + segment2 = torch.cat([b, segment2], dim=-1) + i, j, k = a, b, k-1 + + return segment1[:, :-1] + 1, segment2[:, :-1] + 1 + + +def step_align_loss(seq_features1, seq_features2): + B, T, C = seq_features1.shape + # the similarity matrix: 16 * 16 + pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1) + # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1) + pred = pred.cumsum(-2).cumsum(-1) + + D = torch.zeros((B, T, T, T), device=seq_features1.device) + D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=pred.device) + + D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1) + + area = torch.ones_like(pred).cumsum(-2).cumsum(-1) + area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :]) + block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :]) + + i, j, a, b = torch.meshgrid(*[torch.arange(T, device=seq_features1.device)]*4) + area = area.clamp_min(1).sqrt() + + block_mat = block_mat.masked_fill(((a >= i) | (b >= j)).unsqueeze(0), float('-inf')) / area + + for k in range(1, T): + # tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1) + tmp = D[:, k-1, None, None, :, :] + block_mat + D[:, k] = torch.max(tmp.flatten(3), -1).values + D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices + + final_result = D[:, :, T-1, T-1] + return -(final_result.max(dim=-1).values).mean(), final_result.max(dim=-1).indices, D_ind + + +def single_align_loss(seq_features1, seq_features2): + device = seq_features1.device + T, C = seq_features1.shape + pred = (torch.einsum('ic,jc->ij', seq_features1, seq_features2) / math.sqrt(C)).log_softmax(-1) + + ZERO_PAD = torch.zeros((1), device=device) + ONE_PAD = torch.ones((1), device=device) + S = seq_features2.shape[0] + + target = (torch.arange(S, device=device)) + + D_TABLE = ONE_PAD.log() + for t in range(T): + D_VEC_1 = torch.logsumexp(torch.stack([D_TABLE[1:t+1], D_TABLE[:-1][:t]]), 0) + pred[t, target[:t]] + D_VEC_2 = D_TABLE[t:t+1] + pred[t, target[t:t+1]] + D_TABLE = torch.cat([ZERO_PAD.log(), D_VEC_1, D_VEC_2], dim=-1) + # changed by hotel: remove " / s" + ctc_distance = -D_TABLE[S] + return ctc_distance + + +def frame2varstep_loss(seq_features1, seq_features2, video_seg): + B, T, C = seq_features1.shape + losses = [] + for batch in range(B): + seq_feature1 = seq_features1[batch] + + cur_seg = video_seg[batch] + cur_seg = cur_seg[:-1] + 1 + sparse_feature2 = seq_features2[batch, cur_seg, :] + frame_loss = single_align_loss(seq_feature1, sparse_feature2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1).mean(-1) + + +def frame2varstep_dist(seq_features1, seq_features2, video_seg): + B, T, C = seq_features1.shape + losses = [] + for batch in range(B): + seq_feature1 = seq_features1[batch] + + cur_seg = video_seg[batch] + cur_seg = cur_seg[:-1] + 1 + sparse_feature2 = seq_features2[batch, cur_seg, :] + frame_loss = single_align_loss(seq_feature1, sparse_feature2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1) + + +def frame2learnedstep_dist(frame_feats1, step_feats2): + B, T, C = frame_feats1.shape + losses = [] + for batch in range(B): + frame_feat1 = frame_feats1[batch] + step_feat2 = step_feats2[batch] + # step_feat2 = step_feat2[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]] + frame_loss = single_align_loss(frame_feat1, step_feat2) + losses.append(frame_loss) + + return torch.stack(losses, dim=-1) diff --git a/yc2_univl/backup/pdvc/dp/__init__.py b/yc2_univl/backup/pdvc/dp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd28dccf2f11d713b40d4e237cb5a055bf54ca5d Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1197f07fc41ae6f41b581ebd13f30b674234acf4 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24490f9a2f7cc151dc46f67b4d4ae214dba5c47a Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b35d663b4275176bf9f37c5dff954afd66df0e6 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fe93fd162f629560d23a2791ff3dab2c276d70c Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f2ae8f9d246202b485f89aa690174225dc2e66e Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e7d1ad496851d504c4b5de3cabed3465262cf89 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af2c6ca1bfc47fc34f69aaeee119c1c439fdea4b Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..214dc29706641783b09e447117f540f723ec6868 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d84ee83c249b2c327db4180485c62581e0bcb345 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae95ac7acddab327941068e44fcc974789c6d059 Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/dp/dp_utils.py b/yc2_univl/backup/pdvc/dp/dp_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f1dcdb6e6cb0385b1862aff36c779cdda89cf563 --- /dev/null +++ b/yc2_univl/backup/pdvc/dp/dp_utils.py @@ -0,0 +1,402 @@ +import numpy as np +import torch +import math + +from itertools import product +from torch import log, exp +import torch.nn.functional as F + + +device = "cuda" if torch.cuda.is_available() else "cpu" + + +def compute_all_costs( + z_features, + x_features, + gamma_xz, + drop_cost_type, + keep_percentile, + l2_normalize=False, + given_baseline_logits=None, + return_baseline=False, +): + """This function computes pairwise match and individual drop costs used in Drop-DTW + + Parameters + __________ + + sample: dict + sample dictionary + distractor: torch.tensor of size [d] or None + Background class prototype. Only used if the drop cost is learnable. + drop_cost_type: str + The type of drop cost definition, i.g., learnable or logits percentile. + keep_percentile: float in [0, 1] + if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile + l2_normalize: bool + wheather to normalize clip and step features before computing the costs + """ + + if l2_normalize: + x_features = F.normalize(x_features, p=2, dim=1) + z_features = F.normalize(z_features, p=2, dim=1) + + sim = z_features @ x_features.T + + if drop_cost_type == "logit": + if keep_percentile > 1: + baseline_logit = sim.min().detach() - 1 + else: + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + sims_ext = torch.cat([sim, baseline_logits], dim=0) + else: + assert False, f"No such drop mode {drop_cost_type}" + + softmax_sims = torch.nn.functional.softmax(sims_ext / gamma_xz, dim=0) + matching_probs, drop_probs = softmax_sims[:-1], softmax_sims[-1] + zx_costs = -torch.log(matching_probs + 1e-5) + drop_costs = -torch.log(drop_probs + 1e-5) + return zx_costs, drop_costs, drop_probs + + +def compute_double_costs( + z_features, + x_features, + gamma_xz, + drop_cost_type, + keep_percentile, + l2_normalize=False, + return_baseline=False, +): + """This function computes pairwise match and individual drop costs used in Drop-DTW + + Parameters + __________ + + sample: dict + sample dictionary + distractor: torch.tensor of size [d] or None + Background class prototype. Only used if the drop cost is learnable. + drop_cost_type: str + The type of drop cost definition, i.g., learnable or logits percentile. + keep_percentile: float in [0, 1] + if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile + l2_normalize: bool + wheather to normalize clip and step features before computing the costs + """ + + z_features, frame_features = z_features, x_features + if l2_normalize: + x_features = F.normalize(frame_features, p=2, dim=1) + z_features = F.normalize(z_features, p=2, dim=1) + sim = z_features @ x_features.T + + if drop_cost_type == "logit": + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + else: + assert False, f"No such drop mode {drop_cost_type}" + sim_ext = F.pad(sim, (0, 1, 0, 1), value=baseline_logit) + + softmax_sims = torch.nn.functional.softmax(sim_ext.reshape(-1) / gamma_xz, dim=0).reshape(sim_ext.shape) + matching_probs, x_drop_probs, z_drop_probs = softmax_sims[:-1, :-1], softmax_sims[-1, :-1], softmax_sims[:-1, -1] + zx_costs = -torch.log(matching_probs + 1e-5) + x_drop_costs = -torch.log(x_drop_probs + 1e-5) + z_drop_costs = -torch.log(z_drop_probs + 1e-5) + return zx_costs, x_drop_costs, z_drop_costs + + +class VarTable: + def __init__(self, dims, dtype=torch.float, device=device): + self.dims = dims + d1, d2, d_rest = dims[0], dims[1], dims[2:] + + self.vars = [] + for i in range(d1): + self.vars.append([]) + for j in range(d2): + var = torch.zeros(d_rest).to(dtype).to(device) + self.vars[i].append(var) + + def __getitem__(self, pos): + i, j = pos + return self.vars[i][j] + + def __setitem__(self, pos, new_val): + i, j = pos + if self.vars[i][j].sum() != 0: + assert False, "This cell has already been assigned. There must be a bug somwhere." + else: + self.vars[i][j] = self.vars[i][j] + new_val + + def show(self): + device, dtype = self[0, 0].device, self[0, 0].dtype + mat = torch.zeros((self.d1, self.d2, self.d3)).to().to(dtype).to(device) + for dims in product([range(d) for d in self.dims]): + i, j, rest = dims[0], dims[1], dims[2:] + mat[dims] = self[i, j][rest] + return mat + + +def minGamma(inputs, gamma=1, keepdim=True): + """continuous relaxation of min defined in the D3TW paper""" + if type(inputs) == list: + if inputs[0].shape[0] == 1: + inputs = torch.cat(inputs) + else: + inputs = torch.stack(inputs, dim=0) + + if gamma == 0: + minG = inputs.min(dim=0, keepdim=keepdim) + else: + # log-sum-exp stabilization trick + zi = -inputs / gamma + max_zi = zi.max() + log_sum_G = max_zi + log(exp(zi - max_zi).sum(dim=0, keepdim=keepdim) + 1e-5) + minG = -gamma * log_sum_G + return minG + + +def minProb(inputs, gamma=1, keepdim=True): + if type(inputs) == list: + if inputs[0].shape[0] == 1: + inputs = torch.cat(inputs) + else: + inputs = torch.stack(inputs, dim=0) + + if gamma == 0: + minP = inputs.min(dim=0, keepdim=keepdim) + else: + probs = F.softmax(-inputs / gamma, dim=0) + minP = (probs * inputs).sum(dim=0, keepdim=keepdim) + return minP + + +def prob_min(values, gamma_min, logits=None): + logits = values if logits is None else logits + assert len(logits) == len(values), "Values and prob logits are of different length" + + if len(values) > 1: + values = torch.cat(values, dim=-1) + logits = torch.cat(logits, dim=-1) + else: + values = values[0] + logits = logits[0] + + if gamma_min > 0: + probs = F.softmax(-logits / gamma_min, dim=-1) + else: + probs = F.one_hot(logits.argmin(), logits.size(-1)) + + if values.dim() > probs.dim(): + probs = probs[..., None, :] + + out = (values * probs).sum(-1).to(values.dtype) + return out + + +def list_min(values, keys=None): + keys = values if keys is None else keys + assert len(keys) == len(values), "Values and prob logits are of different length" + + if values[0].dim() == keys[0].dim() + 1: + dim = -2 + else: + dim = -1 + + if len(values) > 1: + values = torch.cat(values, dim=dim) + keys = torch.cat(keys, dim=-1) + else: + values = values[0] + keys = keys[0] + + onehot = F.one_hot(keys.argmin(-1), keys.size(-1)) + if values.dim() > keys.dim(): + onehot = onehot[..., None] + out = (values * onehot).sum(dim).to(values.dtype) + return out + + +def traceback(D): + i, j = np.array(D.shape) - 2 + p, q = [i], [j] + while (i > 0) or (j > 0): + tb = np.argmin((D[i, j], D[i, j + 1], D[i + 1, j])) + if tb == 0: + i -= 1 + j -= 1 + elif tb == 1: + i -= 1 + else: # (tb == 2): + j -= 1 + p.insert(0, i) + q.insert(0, j) + return np.array(p), np.array(q) + + +def diag_to_mat(diags, K, N): + mat = np.zeros([K, N]) - 123 + for d in range(len(diags)): + for r, v in enumerate(diags[d]): + j = min(d, N - 1) - r + i = d - j + mat[i, j] = v if v < 1e8 else np.inf + return mat + + +def pad_costs(zx_costs_list, drop_costs_list): + B = len(zx_costs_list) + Ns, Ks = [], [] + for i in range(B): + Ki, Ni = zx_costs_list[i].shape + if Ki >= Ni: + # in case the number of steps is greater than the number of frames, + # duplicate every frame and let the drops do the job. + mult = math.ceil(Ki / Ni) + zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1]) + drop_costs_list[i] = torch.stack([drop_costs_list[i]] * mult, dim=-1).reshape([-1]) + Ni *= mult + Ns.append(Ni) + Ks.append(Ki) + N, K = max(Ns), max(Ks) + + # preparing padded tables + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], [] + for i in range(B): + zx_costs = zx_costs_list[i] + drop_costs = drop_costs_list[i] + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # padding everything to the size of the largest N and K + row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device) + padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad])) + padded_drop_costs.append(torch.cat([drop_costs, row_pad])) + multirow_pad = torch.stack([row_pad + 9999999999] * Ks[i], dim=0) + padded_table = torch.cat([zx_costs, multirow_pad], dim=1) + rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + 9999999999 + padded_table = torch.cat([padded_table, rest_pad], dim=0) + padded_zx_costs.append(padded_table) + return padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks + + +def get_diag_coord_grid(B, d_len, num_states, d_idx): + """ + B - batch size + d - num_elements in the diagonal + num_states - number of states in DP table + d_idx - idx of the diagonal , used for marking + """ + r = torch.arange(d_len) + s = torch.arange(num_states) + d = torch.ones(d_len, num_states) * d_idx + mg = torch.stack([d, *torch.meshgrid(r, s)], dim=-1)[None, ...].repeat([B, 1, 1, 1]) + return mg + + +def diag_traceback(pointer, N, paths): + # getting rid of unnecessary elements in the batch + pointer = [int(l.item()) for l in pointer] + d, r, s = pointer + traceback = [pointer] + while d > 0: + new_pointer = [int(l.item()) for l in paths[d][r, s]] + traceback.append(new_pointer) + d, r, s = new_pointer + + # transform to rectangular coordinates + rectangular_traceback = [] + for d, r, s in traceback: + i = r + max(0, d - N + 1) + j = d - i + if i > 0 and j > 0: + rectangular_traceback.append((i, j, s)) + + return traceback, rectangular_traceback + + +def nw_diag_traceback(d, r, N, paths): + d, r = int(d.item()), int(r.item()) + traceback = [] + while d > 0: + d_1, s_1, s = [int(l.item()) for l in paths[d][r, 0]] + traceback.append((d, r, s)) + d, r = d_1, s_1 + + # transform to rectangular coordinates + rectangular_traceback = [] + for d, r, s in traceback: + i = r + max(0, d - N + 1) + j = d - i + if i > 0 and j > 0: + rectangular_traceback.append((i, j, s)) + + return traceback, rectangular_traceback + + +def compute_symmetric_cost(sim, keep_percentile=0.3): + k = max([1, int(torch.numel(sim) * keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + zx_costs = -sim + x_drop_costs = -baseline_logits.squeeze() + z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze() + return zx_costs, x_drop_costs, z_drop_costs + + +#============ Hach from model_utilis.py in StepFormer ============# + + + +def unique_softmax(sim, labels, gamma=1, dim=0): + assert sim.shape[0] == labels.shape[0] + labels = labels.detach().cpu().numpy() + _, unique_index, unique_inverse_index = np.unique(labels, return_index=True, return_inverse=True) + unique_sim = sim[unique_index] + unique_softmax_sim = torch.nn.functional.softmax(unique_sim / gamma, dim=dim) + softmax_sim = unique_softmax_sim[unique_inverse_index] + return softmax_sim + +def compute_masked_sims(z, x, z_pad_mask, x_pad_mask, l2_normalize=False, softmax_dim=None, gamma=None): + # z ~ [B, K, d], x ~ [B, N, d] + if l2_normalize: + z, x = F.normalize(z, dim=-1), F.normalize(x, dim=-1) + pad_sims = torch.einsum("bkd,bnd->bkn", z, x) + masked_sims = [] + for i in range(x.shape[0]): + masked_sim = pad_sims[i] + masked_sim = masked_sim if z_pad_mask is None else masked_sim[~z_pad_mask[i], :] + masked_sim = masked_sim if x_pad_mask is None else masked_sim[:, ~x_pad_mask[i]] + if softmax_dim is not None: + masked_sim = F.softmax(masked_sim / gamma, dim=softmax_dim) + masked_sims.append(masked_sim) + return masked_sims + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + + +def cosine_sim(x, z): + cos_sim_fn = torch.nn.CosineSimilarity(dim=1) + return cos_sim_fn(x[..., None], z.T[None, ...]) + + +def cos_dist(x, z): + cos_sim_fn = torch.nn.CosineSimilarity(dim=1) + return (1 - cos_sim_fn(x[..., None], z.T[None, ...])) / 2 + + +def l2_dist(x, z): + dist_squared = (x**2).sum() + (z**2).sum() - 2 * x @ z.T + return torch.clamp(dist_squared, min=0).sqrt() + + +def cos_loglikelihood(x, z, gamma=0.1, z_dim=1): + cos_sim = cosine_sim(x, z) + probs = F.softmax(cos_sim / gamma, dim=z_dim) + return torch.log(probs) \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/dp/exact_dp.py b/yc2_univl/backup/pdvc/dp/exact_dp.py new file mode 100644 index 0000000000000000000000000000000000000000..ada874b89a60799af867aab82357c8d7b442348d --- /dev/null +++ b/yc2_univl/backup/pdvc/dp/exact_dp.py @@ -0,0 +1,1123 @@ +import torch +import numpy as np +import torch.nn.functional as F +from functools import partial +from copy import copy + +# from dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min +from pdvc.dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min + + +def crosstask_dp(cost_matrix, exactly_one=True, bg_cost=0): + "Algorithm used in Cross-Task to calculate Recall" + + def get_step(k): + return 0 if k % 2 == 0 else int((k + 1) / 2) + + T = cost_matrix.shape[0] + K = cost_matrix.shape[1] + K_ext = int(2 * K + 1) + + L = -np.ones([T + 1, K_ext], dtype=float) + P = -np.ones([T + 1, K_ext], dtype=float) + L[0, 0] = 0 + P[0, 0] = 0 + + for t in range(1, T + 1): + Lt = L[t - 1, :] + Pt = P[t - 1, :] + for k in range(K_ext): + s = get_step(k) + opt_label = -1 + + j = k + if (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1 and (s == 0 or not exactly_one): + opt_label = j + opt_value = Lt[j] + + j = k - 1 + if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1: + opt_label = j + opt_value = L[t - 1][j] + + if s != 0: + j = k - 2 + if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1: + opt_label = j + opt_value = Lt[j] + + if s != 0: + L[t, k] = opt_value + cost_matrix[t - 1][s - 1] + else: + L[t, k] = opt_value + bg_cost + P[t, k] = opt_label + + labels = np.zeros_like(cost_matrix) + if L[T, K_ext - 1] < L[T, K_ext - 2] or (P[T, K_ext - 2] == -1): + k = K_ext - 1 + else: + k = K_ext - 2 + for t in range(T, 0, -1): + s = get_step(k) + if s > 0: + labels[t - 1, s - 1] = 1 + k = P[t, k].astype(int) + return labels + + +def iou_based_matching(pred_seg, gt_seg, pred_step_ids, gt_step_ids, ignore_class=True): + """Performs the matching of predicted and gt sequence segments""" + pred_segments = torch.stack([pred_seg == idx for idx in pred_step_ids], 0) # [N_pred, T] + gt_segments = torch.stack([gt_seg == idx for idx in gt_step_ids], 0) # [N_gt, T] + intersection = ( + torch.logical_and(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1) + ) # [N_pred, N_gt] + union = torch.logical_or(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1) # [N_pred, N_gt] + iou = intersection / (union + 1e-5) # [N_pred, N_gt] + + C = -iou.detach().cpu().numpy().T # [N_gt, N_pred] + if not ignore_class: + print("Not ignoring class") + is_same_step_id = pred_step_ids.unsqueeze(1) == gt_step_ids.unsqueeze(0) # [N_pred, N_gt] + if is_same_step_id.shape == (1, 1): + C[0, 0] += 9999 * (~is_same_step_id[0, 0]) + else: + C[~is_same_step_id] = 9999 + + x_drop, z_drop = np.zeros(C.shape[1]), np.zeros(C.shape[0]) + labels = double_drop_dtw(C, x_drop, z_drop, one_to_many=False, many_to_one=False, return_labels=True) - 1 + indices = (np.arange(len(labels))[labels > -1], labels[labels > -1]) + return [torch.as_tensor(i, dtype=torch.int64) for i in indices] + + +def drop_dtw(zx_costs, drop_costs, exclusive=True, contiguous=True, one_to_one=False, return_labels=False): + """Drop-DTW algorithm that allows drop only from one (video) side. See Algorithm 1 in the paper. + + Parameters + ---------- + zx_costs: np.ndarray [K, N] + pairwise match costs between K steps and N video clips + drop_costs: np.ndarray [N] + drop costs for each clip + exclusive: bool + If True any clip can be matched with only one step, not many. + contiguous: bool + if True, can only match a contiguous sequence of clips to a step + (i.e. no drops in between the clips) + return_label: bool + if True, returns output directly useful for segmentation computation (made for convenience) + """ + K, N = zx_costs.shape + + # D: the dynamic programming table, which records the intermediate costs + # P: the path tracking table, which records the previous location and state (zi, xi, prev_state) + + # initialize solutin matrices + D = np.zeros([K + 1, N + 1, 2]) # the 2 last dimensions correspond to different states. + # State (dim) 0 - x is matched; State 1 - x is dropped + D[1:, 0, :] = np.inf # no drops in z in any state + D[0, 1:, 0] = np.inf # no drops in x in state 0, i.e. state where x is matched + D[0, 1:, 1] = np.cumsum(drop_costs) # drop costs initizlization in state 1 + + # initialize path tracking info for each state + P = np.zeros([K + 1, N + 1, 2, 3], dtype=int) # the last dimension records the previous location and state (zi, xi, prev_state) + for xi in range(1, N + 1): + P[0, xi, 1] = 0, xi - 1, 1 + # filling in the dynamic tables + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1] + diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states] + diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states] + + left_neigh_states = [0, 1] + left_neigh_coords = [(zi, xi - 1) for _ in left_neigh_states] + left_neigh_costs = [D[zi, xi - 1, s] for s in left_neigh_states] + + left_pos_neigh_states = [0] if contiguous else left_neigh_states + left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states] + left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] # Drop between clips is not allowed when setting `contiguous==True` (one step to sparse clips is not allowed) + + top_pos_neigh_states = [0] + top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states] + top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # state 0: matching x to z + neigh_states_pos = diag_neigh_states + neigh_coords_pos = diag_neigh_coords + neigh_costs_pos = diag_neigh_costs + if not one_to_one: + neigh_states_pos = neigh_states_pos + left_pos_neigh_states + neigh_coords_pos = neigh_coords_pos + left_pos_neigh_coords + neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + if not exclusive: # exclusive=True indicates any clip can be matched with only one step, that is, path from top is not allowed + neigh_states_pos = neigh_states_pos + top_pos_neigh_states + neigh_coords_pos = neigh_coords_pos + top_pos_neigh_coords + neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + top_pos_neigh_costs + + costs_pos = np.array(neigh_costs_pos) + zx_costs[z_cost_ind, x_cost_ind] # calculate cumulative cost in current step + opt_ind_pos = np.argmin(costs_pos) + P[zi, xi, 0] = *neigh_coords_pos[opt_ind_pos], neigh_states_pos[opt_ind_pos] # Records the last step's position (zi,xi) and state (0 or 1) + D[zi, xi, 0] = costs_pos[opt_ind_pos] # Update the minimal cumulative cost of selected path + + # state 1: x is dropped + costs_neg = np.array(left_neigh_costs) + drop_costs[x_cost_ind] + opt_ind_neg = np.argmin(costs_neg) + P[zi, xi, 1] = *left_neigh_coords[opt_ind_neg], left_neigh_states[opt_ind_neg] + D[zi, xi, 1] = costs_neg[opt_ind_neg] + + cur_state = D[K, N, :].argmin() + min_cost = D[K, N, cur_state] + #breakpoint() + + # backtracking the solution + zi, xi = K, N + path, labels = [], np.zeros(N) + x_dropped = [] if cur_state == 1 else [N] + while not (zi == 0 and xi == 0): + path.append((zi, xi)) + zi_prev, xi_prev, prev_state = P[zi, xi, cur_state] + if xi > 0: + labels[xi - 1] = zi * (cur_state == 0) # either zi or 0 + if prev_state == 1: + x_dropped.append(xi_prev) + zi, xi, cur_state = zi_prev, xi_prev, prev_state + + if not return_labels: + return min_cost, D, path, x_dropped + else: + return labels + + +def double_drop_dtw( + pairwise_zx_costs, + x_drop_costs, + z_drop_costs, + contiguous=True, + one_to_many=True, + many_to_one=True, + return_labels=False, +): + """Drop-DTW algorithm that allows drops from both sequences. See Algorithm 1 in Appendix. + + Parameters + ---------- + pairwise_zx_costs: np.ndarray [K, N] + pairwise match costs between K steps and N video clips + x_drop_costs: np.ndarray [N] + drop costs for each clip + z_drop_costs: np.ndarray [N] + drop costs for each step + contiguous: bool + if True, can only match a contiguous sequence of clips to a step + (i.e. no drops in between the clips) + """ + K, N = pairwise_zx_costs.shape + + # initialize solution matrices + D = np.zeros([K + 1, N + 1, 4]) # the 4 dimensions are the following states: zx, z-, -x, -- + # no drops allowed in zx DP. Setting the same for all DPs to change later here. + D[1:, 0, :] = 99999999 + D[0, 1:, :] = 99999999 + D[0, 0, 1:] = 99999999 + # Allow to drop x in z- and -- + D[0, 1:, 1], D[0, 1:, 3] = np.cumsum(x_drop_costs), np.cumsum(x_drop_costs) + # Allow to drop z in -x and -- + D[1:, 0, 2], D[1:, 0, 3] = np.cumsum(z_drop_costs), np.cumsum(z_drop_costs) + + # initialize path tracking info for each of the 4 DP tables: + P = np.zeros([K + 1, N + 1, 4, 3], dtype=int) # (zi, xi, prev_state) + for zi in range(1, K + 1): + P[zi, 0, 2], P[zi, 0, 3] = (zi - 1, 0, 2), (zi - 1, 0, 3) + for xi in range(1, N + 1): + P[0, xi, 1], P[0, xi, 3] = (0, xi - 1, 1), (0, xi - 1, 3) + + # filling in the dynamic tables + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1, 2, 3] # zx, z-, -x, -- + diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states] + diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states] + + left_pos_neigh_states = [0, 1] # zx and z- + left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states] + left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] + + top_pos_neigh_states = [0, 2] # zx and -x + top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states] + top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states] + + left_neg_neigh_states = [2, 3] # -x and -- + left_neg_neigh_coords = [(zi, xi - 1) for _ in left_neg_neigh_states] + left_neg_neigh_costs = [D[zi, xi - 1, s] for s in left_neg_neigh_states] + + top_neg_neigh_states = [1, 3] # z- and -- + top_neg_neigh_coords = [(zi - 1, xi) for _ in top_neg_neigh_states] + top_neg_neigh_costs = [D[zi - 1, xi, s] for s in top_neg_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # DP 0: coming to zx + neigh_states_zx = diag_neigh_states + neigh_coords_zx = diag_neigh_coords + neigh_costs_zx = diag_neigh_costs + if one_to_many: + if contiguous: + neigh_states_zx.extend(left_pos_neigh_states[0:1]) + neigh_coords_zx.extend(left_pos_neigh_coords[0:1]) + neigh_costs_zx.extend(left_pos_neigh_costs[0:1]) + else: + neigh_states_zx.extend(left_pos_neigh_states) + neigh_coords_zx.extend(left_pos_neigh_coords) + neigh_costs_zx.extend(left_pos_neigh_costs) + if many_to_one: + neigh_states_zx.extend(top_pos_neigh_states) + neigh_coords_zx.extend(top_pos_neigh_coords) + neigh_costs_zx.extend(top_pos_neigh_costs) + + costs_zx = np.array(neigh_costs_zx) + pairwise_zx_costs[z_cost_ind, x_cost_ind] + opt_ind_zx = np.argmin(costs_zx) + P[zi, xi, 0] = *neigh_coords_zx[opt_ind_zx], neigh_states_zx[opt_ind_zx] + D[zi, xi, 0] = costs_zx[opt_ind_zx] + + # DP 1: coming to z- + neigh_states_z_ = left_pos_neigh_states + neigh_coords_z_ = left_pos_neigh_coords + neigh_costs_z_ = left_pos_neigh_costs + costs_z_ = np.array(neigh_costs_z_) + x_drop_costs[x_cost_ind] + opt_ind_z_ = np.argmin(costs_z_) + P[zi, xi, 1] = *neigh_coords_z_[opt_ind_z_], neigh_states_z_[opt_ind_z_] + D[zi, xi, 1] = costs_z_[opt_ind_z_] + + # DP 2: coming to -x + neigh_states__x = top_pos_neigh_states + neigh_coords__x = top_pos_neigh_coords + neigh_costs__x = top_pos_neigh_costs + costs__x = np.array(neigh_costs__x) + z_drop_costs[z_cost_ind] + opt_ind__x = np.argmin(costs__x) + P[zi, xi, 2] = *neigh_coords__x[opt_ind__x], neigh_states__x[opt_ind__x] + D[zi, xi, 2] = costs__x[opt_ind__x] + + # DP 3: coming to -- + neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states) + # neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states + diag_neigh_states) + # adding negative left and top neighbors + neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords) + # neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords + diag_neigh_coords) + costs___ = np.concatenate( + [ + left_neg_neigh_costs + x_drop_costs[x_cost_ind], + top_neg_neigh_costs + z_drop_costs[z_cost_ind], + # diag_neigh_costs + z_drop_costs[z_cost_ind] + x_drop_costs[x_cost_ind], + ], + 0, + ) + + opt_ind___ = costs___.argmin() + P[zi, xi, 3] = *neigh_coords___[opt_ind___], neigh_states___[opt_ind___] + D[zi, xi, 3] = costs___[opt_ind___] + + cur_state = D[K, N, :].argmin() + min_cost = D[K, N, cur_state] + + # unroll path + path = [] + zi, xi = K, N + x_dropped = [N] if cur_state in [1, 3] else [] + z_dropped = [K] if cur_state in [2, 3] else [] + while not (zi == 0 and xi == 0): + path.append((zi, xi)) + zi_prev, xi_prev, prev_state = P[zi, xi, cur_state] + if prev_state in [1, 3]: + x_dropped.append(xi_prev) + if prev_state in [2, 3]: + z_dropped.append(zi_prev) + zi, xi, cur_state = zi_prev, xi_prev, prev_state + + if return_labels: + labels = np.zeros(N) + for zi, xi in path: + if zi not in z_dropped and xi not in x_dropped: + labels[xi - 1] = zi + return labels + else: + return min_cost, path, x_dropped, z_dropped + + +def batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True +): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]] + neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]] + + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + coord_left_pos, coord_left_neg = coord_left[..., [0, 1], :], coord_left[..., [2, 3], :] + coord_up_pos, coord_up_neg = coord_up[..., [0, 2], :], coord_up[..., [1, 3], :] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors_zx = [neigh_diag] + coordinates_zx = [coord_diag] + if one_to_many: + neighbors_zx.append(neigh_left_pos[..., [0]] if contiguous else neigh_left) + coordinates_zx.append(coord_left_pos[..., [0], :] if contiguous else coord_left) + if many_to_one: + neighbors_zx.append(neigh_up_pos) + coordinates_zx.append(coord_up_pos) + diag_zx = list_min(neighbors_zx) + match_costs_diag + path_zx = list_min(coordinates_zx, keys=neighbors_zx) + + # DP 1: coming to z- + neighbors_z_ = [neigh_left_pos] + coordinates_z_ = [coord_left_pos] + diag_z_ = list_min(neighbors_z_) + x_drop_costs_diag + path_z_ = list_min(coordinates_z_, keys=neighbors_z_) + + # DP 2: coming to -x + neighbors__x = [neigh_up_pos] + coordinates__x = [coord_up_pos] + diag__x = list_min(neighbors__x) + z_drop_costs_diag + path__x = list_min(coordinates__x, keys=neighbors__x) + + # DP 3: coming to -- + neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]] + coordinates___ = [coord_left_neg, coord_up_neg] + diag___ = list_min(neighbors___) + path___ = list_min(coordinates___, neighbors___) + + # Aggregating all the dimensions of DP together + diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1) + path = torch.stack([path_zx, path_z_, path__x, path___], -2) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1 + ) # [4, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 4, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)], + dim=-1, + ) # [4, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]]) + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + # current_N = Ns[orig_b.item()] + 1 + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 1], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = x1_dropcost[..., None] + diag_p_col = z1_dropcost[..., None] + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 1, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 1, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 1, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 1, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + + coord_up, coord_left, coord_diag = ( + coord_p[:, :-1].clone(), + coord_p[:, 1:].clone(), + coord_pp[:, pp_start : (pp_start + size)].clone(), + ) + # assign the right state to coordinates + coord_diag[..., 2] = 0 + coord_left[..., 2] = 1 + coord_up[..., 2] = 2 + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors = [ + neigh_diag + match_costs_diag[..., None], + neigh_left + x_drop_costs_diag[..., None], + neigh_up + z_drop_costs_diag[..., None], + ] + coordinates = [coord_diag, coord_left, coord_up] + diag = list_min(neighbors)[..., None] + path = (list_min(coordinates, keys=neighbors))[..., None, :] + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = x_drop_cost[..., None] + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(1) * (effective_d - 1), torch.zeros(1), torch.ones(1) * 1], dim=-1 + ) # [1, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 1, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = z_drop_cost[..., None] + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(1) * (effective_d - 1), torch.ones(1) * (last_r_p - 1), torch.ones(1) * 2], + dim=-1, + ) # [1, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 1, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 1, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + this_paths = [p[b.item()] for p in all_paths] + current_N = N + 1 + dc, rc, _ = coord_p[b, r][0] + tracebacks[orig_b.item()] = nw_diag_traceback(dc, rc, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def batch_drop_dtw_machine(zx_costs_list, x_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 2], device=dev) # diag at i-2 + x1_dropcost = all_cum_x_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 2, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 2, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 2, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 2, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]] + + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + coord_up_pos, coord_left_pos = coord_up[..., [0], :], coord_left[..., [0], :] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + pos_neighbors = [neigh_diag] + pos_coordinates = [coord_diag] + if one_to_many: + pos_neighbors.append(neigh_left_pos if contiguous else neigh_left) + pos_coordinates.append(coord_left_pos if contiguous else coord_left) + if many_to_one: + pos_neighbors.append(neigh_up) + pos_coordinates.append(coord_up) + diag_pos = list_min(pos_neighbors) + match_costs_diag + path_pos = list_min(pos_coordinates, keys=pos_neighbors) + + neg_neighbors = [neigh_left] + neg_coordinates = [coord_left] + diag_neg = list_min(neg_neighbors) + x_drop_costs_diag + path_neg = list_min(neg_coordinates, keys=neg_neighbors) + + diag = torch.stack([diag_pos, diag_neg], -1) + path = torch.stack([path_pos, path_neg], -2) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(2) * (effective_d - 1), torch.zeros(2), torch.arange(2)], dim=-1 + ) # [2, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 2, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + pad = torch.stack([batch_inf, batch_inf], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(2) * (effective_d - 1), torch.ones(2) * (last_r_p - 1), torch.arange(2)], + dim=-1, + ) # [2, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 2, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_cum_x_drop_costs, batch_inf = [ + t[~mask] for t in [all_x_drop_costs, all_cum_x_drop_costs, batch_inf] + ] + all_paths = [p[~mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +def fast_batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True +): + # many_to_one is the same as not exclusive, i.e. multiple z match to one x + # one_to_many was always true by default before, i.e. multiple x match to one z + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + + # create routing masks for selection + # 4x3 corresponds to 4 states (zx, z-, -x, --) and 3 neighbors (l, d, u) + zx_mask = torch.zeros((4, 3)) + zx_mask[:, 1] = 1 + if one_to_many: + zx_mask[0, 0] = 1 + if not contiguous: + zx_mask[1, 0] = 1 + if many_to_one: + zx_mask[[0, 2], 2] = 1 + + z__mask = torch.zeros((4, 3)) + z__mask[[0, 1], 0] = 1 + + _x_mask = torch.zeros((4, 3)) + _x_mask[[0, 2], 2] = 1 + + ___mask = torch.zeros((4, 3)) + ___mask[[2, 3], 0] = 1 + ___mask[[1, 3], 2] = 1 + + mask = torch.stack([zx_mask, z__mask, _x_mask, ___mask], dim=-1).to(dev).to(dtype) # [4, 3, 4] + + def transition( + neigh_left, neigh_diag, neigh_up, coord_left, coord_diag, coord_up, match_costs, x_drop_costs, z_drop_costs + ): + all_neigh = torch.stack([neigh_left, neigh_diag, neigh_up], dim=-1) # [B, d, 4, 3] + all_coords = torch.stack([coord_left, coord_diag, coord_up], dim=-1).permute( + [0, 1, 3, 2, 4] + ) # [B, d, 3, 4, 3], the first 3 is the spatial dimension of coordinates + additions_zx = match_costs[..., None].repeat([1, 1, 3]) # [B, d, 3] + additions_z_ = x_drop_costs[..., None].repeat([1, 1, 3]) + additions__x = z_drop_costs[..., None].repeat([1, 1, 3]) + additions___ = torch.stack([x_drop_costs, match_costs, z_drop_costs], dim=-1) + additions = torch.stack([additions_zx, additions_z_, additions__x, additions___], dim=-1) # [B, d, 3, 4] + + inverse_mask = (~(mask[None, None, ...].to(bool))).to(dtype) + filtered_costs = all_neigh[..., None] * mask[None, None, ...] + inverse_mask * inf[0] # [B, d, 4, 3, 4] + full_costs = filtered_costs + additions[:, :, None, :, :] * mask[None, None, ...] + B, d = full_costs.shape[:2] + the_min = full_costs.reshape([B, d, -1, 4]).min(dim=2) + new_diag = the_min.values + + all_coords = all_coords[..., None].repeat([1, 1, 1, 1, 1, 4]).reshape([B, d, 3, -1, 4]) + argmins = the_min.indices[:, :, None, None, :].repeat([1, 1, 3, 1, 1]) + pointers = torch.gather(all_coords, index=argmins, dim=-2) + pointers = pointers[:, :, :, 0, :].permute([0, 1, 3, 2]) + return new_diag, pointers + + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The path is also a diagonal representation that carries the optimal pathlength to each point + path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int) + path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int) + all_paths = [path_pp, path_p] # going to store all the intermediate paths diagonals for the backtrack + + # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point + # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal + # and s is statet (one of the 4) + coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev) + coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + tracebacks = [None for _ in range(B)] # going to store all the intermediate paths diagonals for the backtrack + + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + diag, path = transition( + neigh_left, + neigh_diag, + neigh_up, + coord_left, + coord_diag, + coord_up, + match_costs_diag, + x_drop_costs_diag, + z_drop_costs_diag, + ) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + effective_d = d + 2 # effective count of d is actually d + 2, since started with 2 + if d < N - 1: + # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([cost_pad, diag], dim=1) + + # fill in 0th row of path matrix with the right pointers + left_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1 + ) # [4, 3] + left_pointer = ( + left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) + ) # [B, 1, 4, 3] + path = torch.cat([left_pointer, path], 1) + if d < K - 1: + # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], dim=1) + + # fill in 0th col of path matrix with the right pointers + + # the number of elements in the prev diagonal. Refers to 0th element of the column + last_r_p = diag_p.size(1) + up_pointer = torch.stack( + [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)], + dim=-1, + ) # [4, 3] + up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype) # [B, 1, 4, 3] + path = torch.cat([path, up_pointer], dim=1) + + all_paths.append(path) + + diag_pp = diag_p + diag_p = diag + + coord_pp = coord_p + coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev) + + # process answers + if (Ds == d).any(): + local_mask, orig_mask = Ds == d, Ds_orig == d + original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0] + bs, rs = torch.nonzero(local_mask, as_tuple=False)[:, 0], Rs[local_mask] + min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]]) + for orig_b, b, r in zip(original_bs, bs, rs): + # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]]) + best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]]) + this_paths = [p[b.item()] for p in all_paths] + # current_N = Ns[orig_b.item()] + 1 + current_N = N + 1 + tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1] + + # filtering out already processed elements + diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [ + t[~local_mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [ + t[~local_mask] + for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf] + ] + all_paths = [p[~local_mask] for p in all_paths] + + if torch.numel(Ds) == 0: + break + + return min_costs, tracebacks + + +if __name__ == '__main__': + zx_costs = np.random.rand(3, 4) # K=3 steps, N=4 clips + # zx_costs = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) + drop_costs = np.random.rand(4) + align = drop_dtw(zx_costs, drop_costs) + #breakpoint() diff --git a/yc2_univl/backup/pdvc/dp/soft_dp.py b/yc2_univl/backup/pdvc/dp/soft_dp.py new file mode 100644 index 0000000000000000000000000000000000000000..9d5c17e5d5eeff50254dc7b8d31f6d43b253e388 --- /dev/null +++ b/yc2_univl/backup/pdvc/dp/soft_dp.py @@ -0,0 +1,617 @@ +import numpy as np +import torch +import math +from torch import log, exp +import torch.nn.functional as F +from copy import copy + +from pdvc.dp.dp_utils import VarTable, minGamma, minProb, pad_costs, prob_min, unique_softmax, cosine_sim + + +device = "cuda" if torch.cuda.is_available() else "cpu" + + +def softDTW( + step_features, + frame_features, + labels, + dist_type="inner", + softning="prob", + gamma_min=0.1, + gamma_xz=0.1, + step_normalize=True, +): + """function to obtain a soft (differentiable) version of DTW + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the function + _min_fn = minProb if softning == "prob" else minGamma + min_fn = lambda x: _min_fn(x, gamma=gamma_min) + + # first get a pairwise distance matrix + if dist_type == "inner": + dist = step_features @ frame_features.T + else: + dist = cosine_sim(step_features, frame_features) + if step_normalize: + if labels is not None: + norm_dist = unique_softmax(dist, labels, gamma_xz) + else: + norm_dist = torch.softmax(dist / gamma_xz, 0) + dist = -log(norm_dist) + + # initialize soft-DTW table + nrows, ncols = dist.shape + # sdtw = torch.zeros((nrows+1,ncols+1)).to(torch.float).to(device) + sdtw = VarTable((nrows + 1, ncols + 1)) + for i in range(1, nrows + 1): + sdtw[i, 0] = 9999999999 + for j in range(1, ncols + 1): + sdtw[0, j] = 9999999999 + + # obtain dtw table using min_gamma or softMin relaxation + for i in range(1, nrows + 1): + for j in range(1, ncols + 1): + neighbors = torch.stack([sdtw[i, j - 1], sdtw[i - 1, j - 1], sdtw[i - 1, j]]) + di, dj = i - 1, j - 1 # in the distance matrix indices are shifted by one + new_val = dist[di, dj] + min_fn(neighbors) + sdtw[i, j] = torch.squeeze(new_val, 0) + sdtw_loss = sdtw[nrows, ncols] / step_features.shape[0] + return sdtw_loss, sdtw, dist + + +def dropDTW(zx_costs, drop_costs, softning="prob", exclusive=True, contiguous=True, gamma_min=1): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the min function + min_fn = minProb if softning == "prob" else minGamma + inf = 9999999999 + K, N = zx_costs.shape + exclusive = exclusive if K <= N else False + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 3)) # This corresponds to B 3-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf + for xi in range(1, N + 1): + D[0, xi] = torch.zeros_like(D[0, xi]) + cum_drop_costs[xi - 1] + + # obtain dtw table using min_gamma or softMin relaxation + for zi in range(1, K + 1): + for xi in range(1, N + 1): + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1] + dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3] + + # positive transition, i.e. matching x_i to z_j + if contiguous: + pos_neighbors = [d_diag, dp_left] + else: + pos_neighbors = [d_diag, d_left] + if not exclusive: + pos_neighbors.append(dp_up) + + Dp = min_fn(pos_neighbors, gamma=gamma_min) + zx_costs[z_cost_ind, x_cost_ind] + + # negative transition, i.e. dropping xi + Dm = d_left + drop_costs[x_cost_ind] + + # update final solution matrix + D_final = min_fn([Dm, Dp], gamma=gamma_min) + D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0) + + # Computing the final min cost for the whole batch + min_cost = D[K, N][0] + return min_cost, D + + +def batch_dropDTW( + zx_costs_list, drop_costs_list, softning="prob", exclusive=True, contiguous=True, drop_mode="DropDTW", gamma_min=1 +): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + # defining the min function + min_fn = minProb if softning == "prob" else minGamma + inf = 9999999999 + + # pre-processing + B = len(zx_costs_list) + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list) + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + N, K = max(Ns), max(Ks) + + # preparing padded tables + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], [] + for i in range(B): + zx_costs = zx_costs_list[i] + drop_costs = drop_costs_list[i] + cum_drop_costs = torch.cumsum(drop_costs, dim=0) + + # padding everything to the size of the largest N and K + row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device) + padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad])) + padded_drop_costs.append(torch.cat([drop_costs, row_pad])) + multirow_pad = torch.stack([row_pad + inf] * Ks[i], dim=0) + padded_table = torch.cat([zx_costs, multirow_pad], dim=1) + rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + inf + padded_table = torch.cat([padded_table, rest_pad], dim=0) + padded_zx_costs.append(padded_table) + + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 3, B)) # This corresponds to B 3-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf + for xi in range(1, N + 1): + if drop_mode == "DropDTW": + D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi] + elif drop_mode == "OTAM": + D[0, xi] = torch.zeros_like(D[0, xi]) + else: # drop_mode == 'DTW' + D[0, xi] = torch.zeros_like(D[0, xi]) + inf + + # obtain dtw table using min_gamma or softMin relaxation + for zi in range(1, K + 1): + for xi in range(1, N + 1): + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1] + dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3] + + if drop_mode == "DropDTW": + # positive transition, i.e. matching x_i to z_j + if contiguous: + pos_neighbors = [d_diag, dp_left] + else: + pos_neighbors = [d_diag, d_left] + if not exclusive: + pos_neighbors.append(dp_up) + + Dp = min_fn(pos_neighbors, gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind] + + # negative transition, i.e. dropping xi + Dm = d_left + all_drop_costs[x_cost_ind] + + # update final solution matrix + D_final = min_fn([Dm, Dp], gamma=gamma_min) + else: + d_right = D[zi - 1, xi][0:1] + D_final = Dm = Dp = ( + min_fn([d_diag, d_left, d_right], gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind] + ) + D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0) + + # Computing the final min cost for the whole batch + min_costs = [] + for i in range(B): + Ni, Ki = Ns[i], Ks[i] + min_cost_i = D[Ki, Ni][0, i] + min_costs.append(min_cost_i / Ni) + + return min_costs, D + + +def batch_double_dropDTW(zx_costs_list, drop_costs_list, gamma_min=1): + """function to obtain a soft (differentiable version of DTW) + embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames + and D: dimensionality of of the embedding vector) + """ + min_fn = lambda x: minProb(x, gamma=gamma_min) + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + + # assuming sequences are the same length + B = len(zx_costs_list) + padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list) + all_zx_costs = torch.stack(padded_zx_costs, dim=-1) + all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1) + all_drop_costs = torch.stack(padded_drop_costs, dim=-1) + N, K = max(Ns), max(Ks) + + # Creating and initializing DP tables + D = VarTable((K + 1, N + 1, 4, B), dtype, dev) # This corresponds to B 4-dim DP tables + for zi in range(1, K + 1): + D[zi, 0] = torch.zeros_like(D[zi, 0]) + all_cum_drop_costs[(zi - 1) : zi] + for xi in range(1, N + 1): + D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi] + + for zi in range(1, K + 1): + for xi in range(1, N + 1): + # define frequently met neighbors here + diag_neigh_states = [0, 1, 2, 3] # zx, z-, -x, -- + diag_neigh_costs = [D[zi - 1, xi - 1][s] for s in diag_neigh_states] + + left_neigh_states = [0, 1] # zx and z- + left_neigh_costs = [D[zi, xi - 1][s] for s in left_neigh_states] + + upper_neigh_states = [0, 2] # zx and -x + upper_neigh_costs = [D[zi - 1, xi][s] for s in upper_neigh_states] + + z_cost_ind, x_cost_ind = zi - 1, xi - 1 # indexind in costs is shifted by 1 + + # DP 0: coming to zx + neigh_costs_zx = diag_neigh_costs + upper_neigh_costs + left_neigh_costs + D0 = min_fn(neigh_costs_zx) + all_zx_costs[z_cost_ind, x_cost_ind] + + # DP 1: coming to z- + neigh_costs_z_ = left_neigh_costs + D1 = min_fn(neigh_costs_z_) + all_drop_costs[x_cost_ind] + + # DP 2: coming to -x + neigh_costs__x = upper_neigh_costs + D2 = min_fn(neigh_costs__x) + all_drop_costs[z_cost_ind] + + # DP 3: coming to -- + costs___ = [d + all_drop_costs[z_cost_ind] * 2 for d in diag_neigh_costs] + [ + D[zi, xi - 1][3] + all_drop_costs[x_cost_ind], + D[zi - 1, xi][3] + all_drop_costs[z_cost_ind], + ] + D3 = min_fn(costs___) + + D[zi, xi] = torch.cat([D0, D1, D2, D3], dim=0) + + # Computing the final min cost for the whole batch + min_costs = [] + for i in range(B): + min_cost_i = min_fn(D[K, N][:, i]) + min_costs.append(min_cost_i / N) + return min_costs, D + + +def drop_dtw_machine(zx_costs, drop_costs, gamma_min=1, exclusive=True, contiguous=True): + K, N = zx_costs.shape + dev = zx_costs.device + flipped_costs = torch.flip(zx_costs, [0]) # flip the cost matrix upside down + cum_drop_costs = torch.cumsum(drop_costs, dim=-1) + + # initialize first two contr diagonals + inf = torch.tensor([9999999999], device=dev, dtype=zx_costs.dtype) + diag_pp = torch.zeros([1, 2], device=dev) # diag at i-2 + diag_p_col = torch.ones([1, 2], device=dev) * inf + diag_p_row = torch.stack([inf, cum_drop_costs[[0]]], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 0) # diag at i-1 + + for i in range(K + N - 1): + size = diag_p.size(0) - 1 + pp_start = max(0, diag_pp.size(0) - diag_p.size(0)) + neigh_up, neigh_left, neigh_diag = diag_p[:-1], diag_p[1:], diag_pp[pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[:, [0]], neigh_left[:, [0]] + + # define match and drop cost vectors + match_costs_diag = torch.flip(torch.diag(flipped_costs, i + 1 - K), [-1]) + d_start, d_end = max(1 - K + i, 0), min(i, N - 1) + 1 + drop_costs_diag = torch.flip(drop_costs[d_start:d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left] + if not exclusive: + pos_neighbors.append(neigh_up_pos) + diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag + diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag + diag = torch.stack([diag_pos, diag_neg], -1) + + # add the initialization values on the ends of diagonal if needed + if i < N - 1: + # fill in 0th row with [drop_cost, inf] + pad = torch.stack([inf, cum_drop_costs[[i + 1]]], -1) + diag = torch.cat([pad, diag]) + if i < K - 1: + # fill in 0th col with [inf, inf] + pad = torch.stack([inf, inf], -1) + diag = torch.cat([diag, pad]) + + diag_pp = diag_p + diag_p = diag + assert (diag.size(0) == 1) and (diag.size(1) == 2), f"Last diag shape is {diag.shape} instead of [1, 2]" + + cost = prob_min(diag, gamma_min) + return cost + + +def batch_drop_dtw_machine(zx_costs_list, drop_costs_list, gamma_min=1, exclusive=True, contiguous=True): + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + # For samples where K > N, exclusive computation is not possible + shapes = [t.shape for t in zx_costs_list] + Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes] + N, K = max(Ns), max(Ks) + persample_exclusive = torch.tensor([Ni >= Ki for Ki, Ni in shapes]).to(dev) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # define costs in tensors + all_zx_costs = [F.pad(c, [0, N - c.shape[1], 0, K - c.shape[0]]) for c in zx_costs_list] + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0) + all_cum_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. Here, 0 is keep, 1 is drop. + """ + # initialize first two contr diagonals + batch_inf, batch_ones = torch.stack([inf] * B, 0), torch.ones([B, 1], device=dev, dtype=dtype) + diag_pp = torch.zeros([B, 1, 2], device=dev) # diag at i-2 + diag_p_col = torch.ones([B, 1, 2], device=dev) * batch_inf[..., None] + diag_p_row = torch.stack([batch_inf, all_cum_drop_costs[:, [0]]], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + # The pathlength path is also a diagonal representation that carries the optimal pathlength to each point + with torch.no_grad(): + path_pp = torch.zeros([B, 1, 2], device=dev, dtype=dtype) + path_p = torch.ones([B, 2, 2], device=dev, dtype=dtype) + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) + path_lens = torch.zeros(B).to(dtype=dtype).to(device=dev) + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]] + + neigh_path_up, neigh_path_left, neigh_path_diag = ( + path_p[:, :-1], + path_p[:, 1:], + path_pp[:, pp_start : (pp_start + size)], + ) + neigh_path_up_pos, neigh_path_left_pos = neigh_path_up[..., [0]], neigh_path_left[..., [0]] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + d_start, d_end = max(1 - K + d, 0), min(d, N - 1) + 1 + drop_costs_diag = torch.flip(all_drop_costs[:, d_start:d_end], [-1]) + + # update positive and negative tables -> compute new diagonal + pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left] + pos_path_neighbors = ( + [neigh_path_diag, neigh_path_left_pos] if contiguous else [neigh_path_diag, neigh_path_left] + ) + if exclusive and (~persample_exclusive).any(): + # apply non-exclusive rule for some batch elements, via masing out the exclusive elements with inf + masked_neigh_up_pos = neigh_up_pos + persample_exclusive[:, None, None] * batch_inf[:, None] + pos_neighbors.append(masked_neigh_up_pos) + + pos_path_neighbors.append(neigh_path_up_pos * (~persample_exclusive[:, None, None])) + elif not exclusive: + # apply standard non-exclusive rule to all batch elements + pos_neighbors.append(neigh_up_pos) + pos_path_neighbors.append(neigh_path_up_pos) + + # DP Table update + diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag + diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag + diag = torch.stack([diag_pos, diag_neg], -1) + + # Path Table Update + with torch.no_grad(): + path_pos = prob_min(pos_path_neighbors, gamma_min, pos_neighbors) + 1 + path_neg = prob_min([neigh_path_left], gamma_min, [neigh_left]) + 1 + path = torch.stack([path_pos, path_neg], -1) + + # add the initialization values on the ends of diagonal if needed + if d < N - 1: + # fill in DP table's 0th row with [drop_cost, inf] + pad_d = torch.stack([batch_inf, all_cum_drop_costs[:, [d + 1]]], -1) + diag = torch.cat([pad_d, diag], 1) + + # fill in Path table's 0th row with [d, inf] + pad_p = torch.stack([batch_inf, torch.zeros_like(batch_inf) + d], -1) + path = torch.cat([pad_p, path], 1) + + if d < K - 1: + # fill in DP table's 0th col with [inf, inf] + pad_d = torch.stack([batch_inf, batch_inf], -1) + diag = torch.cat([diag, pad_d], 1) + + # fill in Path table's 0th row with [d, inf] + pad_p = pad_d + path = torch.cat([path, pad_p], 1) + + diag_pp = diag_p + diag_p = diag + + path_pp = path_p + path_p = path + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min) + path_lens[orig_mask] = path_lens[orig_mask] + prob_min([path[bs, rs]], gamma_min, [diag[bs, rs]]) + + diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs] + ] + all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive = [ + t[~mask] for t in [all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive] + ] + if torch.numel(Ds) == 0: + break + + # costs = prob_min([diag], gamma_min) + costs_norm = min_costs / path_lens + return min_costs, path_lens + + +def batch_double_drop_dtw_machine( + zx_costs_list, x_drop_costs_list, z_drop_costs_list, gamma_min=1, exclusive=True, contiguous=True +): + dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype + inf = torch.tensor([9999999999], device=dev, dtype=dtype) + B = len(zx_costs_list) + + Ns, Ks = [], [] + for i in range(B): + Ki, Ni = zx_costs_list[i].shape + if exclusive and Ki >= Ni: + # in case the number of steps is greater than the number of frames, + # duplicate every frame and let the drops do the job. + mult = math.ceil(Ki / Ni) + zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1]) + x_drop_costs_list[i] = torch.stack([x_drop_costs_list[i]] * mult, dim=-1).reshape([-1]) + Ni *= mult + Ns.append(Ni) + Ks.append(Ki) + N, K = max(Ns), max(Ks) + + # transform endpoints into diagonal coordinates + Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int) + for i, (Ki, Ni) in enumerate(zip(Ks, Ns)): + Ds[i] = Ki + Ni - 2 + Rs[i] = min(Ds[i] + 2, N) - Ni + Ds_orig, Rs_orig = copy(Ds), copy(Rs) + + # special padding of costs to ensure that the path goest through the endpoint + all_zx_costs = [] + for i, c in enumerate(zx_costs_list): + c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item()) + mask = torch.ones_like(c_inf_frame) + mask[-1, -1] = 0 + c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1]) + all_zx_costs.append(c_pad) + all_zx_costs = torch.stack(all_zx_costs, 0) + + all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0) + all_cum_x_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0 + ) + all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0) + all_cum_z_drop_costs = torch.stack( + [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0 + ) + flipped_costs = torch.flip(all_zx_costs, [1]) # flip the cost matrix upside down + + """Rules for the diagonals: + dim1: batch dimension + dim2: the diagonal itself. The first element along this dim corresponds + to the top right element on the diagonal. The movement is from top right + to bottom left, like that / + dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows: + {0: zx, 1: z-, 2: -x, 3: --} + """ + # initialize first two contr diagonals + batch_inf = torch.stack([inf] * B, 0) + diag_pp = torch.zeros([B, 1, 4], device=dev) # diag at i-2 + x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]] + diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1) + diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1) + diag_p = torch.cat([diag_p_row, diag_p_col], 1) # diag at i-1 + + min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev) # for storing the solution for each element + for d in range(K + N - 1): + size = diag_p.size(1) - 1 + pp_start = 0 if d < N else 1 + neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)] + neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]] + neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]] + + # define match and drop cost vectors + match_costs_diag = torch.stack( + [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0 + ) + + x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1 + x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1]) + z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1 + z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end] + + # update positive and negative tables -> compute new diagonal + + # DP 0: coming to zx + neighbors_zx = [neigh_diag, neigh_left_pos[..., [0]]] if contiguous else [neigh_diag, neigh_left_pos] + if not exclusive: + neighbors_zx.append(neigh_up_pos) + diag_zx = prob_min(neighbors_zx, gamma_min) + match_costs_diag + + # DP 1: coming to z- + neighbors_z_ = [neigh_left_pos] + diag_z_ = prob_min(neighbors_z_, gamma_min) + x_drop_costs_diag + + # DP 2: coming to -x + neighbors__x = [neigh_up_pos] + diag__x = prob_min(neighbors__x, gamma_min) + z_drop_costs_diag + + # DP 3: coming to -- + neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]] + diag___ = prob_min(neighbors___, gamma_min) + + # Aggregating all the dimensions of DP together + diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1) + + # Haven't done below + # add the initialization values on the ends of diagonal if needed + if d < N - 1: + # fill in 0th row with [drop_cost, inf] + x_drop_cost = all_cum_x_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1) + diag = torch.cat([pad, diag], 1) + if d < K - 1: + # fill in 0th col with [inf, inf] + z_drop_cost = all_cum_z_drop_costs[:, [d + 1]] + pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1) + diag = torch.cat([diag, pad], 1) + + diag_pp = diag_p + diag_p = diag + + # process answers + if (Ds == d).any(): + mask, orig_mask = Ds == d, Ds_orig == d + bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask] + min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min) + + # filtering out already processed elements + diag, diag_p, diag_pp, Ds, Rs, flipped_costs = [ + t[~mask] for t in [diag, diag_p, diag_pp, Ds, Rs, flipped_costs] + ] + all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs = [ + t[~mask] for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs] + ] + + if torch.numel(Ds) == 0: + break + + costs_norm = min_costs / torch.tensor(Ns).to(dev) + return costs_norm + + +if __name__ == "__main__": + from exact_dp import double_drop_dtw + + K, N = 7, 15 + zx_costs = torch.normal(torch.ones([K, N])) + x_drop_costs = zx_costs.mean(0) + z_drop_costs = zx_costs.mean(1) + + min_cost, *_ = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy()) + my_costs = batch_double_drop_dtw_machine([zx_costs], [x_drop_costs], [z_drop_costs], gamma_min=0) + print(my_costs * N, min_cost) diff --git a/yc2_univl/backup/pdvc/dp/visualization.py b/yc2_univl/backup/pdvc/dp/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..fed631a8979532253408fb402672eee0cc4a9a64 --- /dev/null +++ b/yc2_univl/backup/pdvc/dp/visualization.py @@ -0,0 +1,179 @@ +import io +import numpy as np +from matplotlib import pyplot as plt +from matplotlib.pyplot import figure +from PIL import Image + + +# defining the colors and shapes +color_code = [ + "blue", + "orange", + "green", + "red", + "purple", + "brown", + "pink", + "grey", + "olive", + "cyan", + "lime", + "grey", + "firebrick", + "coral", + "chocolate", + "saddlebrown", + "bisque", + "goldenrod", + "gold", + "khaki", + "darkolivegreen", + "greenyellow", + "palegreen", + "springgreen", + "aquamarine", + "teal", + "deepskyblue", + "navy", + "mediumslateblue", + "royalblue", + "indigo", + "magenta", + "deeppink", + "crimson", + "violet", + "snow", + "lightgrey", + "wheat", + "dodgerblue", + "darkseagreen", +] +color_code = color_code * 10 +shape_code = ["o", "s", "P", "*", "h", ">", "X", "d", "D", "v", "<", "p"] +shape_code = shape_code * int(len(color_code) / len(shape_code) + 1) + +color_values = [] +for color in color_code: + _ = plt.fill([0, 0, 1, 1, 0], [0, 1, 1, 0, 0], color) + buf = io.BytesIO() + _ = plt.savefig(buf, format="png") + _ = plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + color_values.append(img[100, 300]) + +color_code_hex = [] +for color_value in color_values: + step_color_rgb = tuple([s.item() for s in color_value]) + color_code_hex.append("#%02x%02x%02x" % step_color_rgb) + + +def plot_alignment( + step_ids, frame_labels, step_colors, step_shapes, size=(15, 2), name="all_step_to_video", to_np=True, grid_on=True +): + N_steps = len(frame_labels) + + plt.rcParams["figure.figsize"] = (size[0], size[1]) + ax = plt.subplot(1, 1, 1) + _ = ax.set_title(name) + + tick_freq = 50 if N_steps > 1500 else 20 + _ = plt.xticks(np.arange(0, N_steps, tick_freq)) + _ = plt.xlim(0, N_steps) + _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True) + + if grid_on: + _ = plt.grid() + else: + plt.plot(np.arange(len(frame_labels)), [1] * len(frame_labels), color="grey") + + for si, step_id in enumerate(step_ids): + time, val = [], [] + for i in range(N_steps): + if si + 1 == frame_labels[i]: + time.append(i) + val.append(1) + time, val = np.array(time), np.array(val) + _ = plt.plot(time, val, step_shapes[step_id], color=step_colors[step_id]) + + if to_np: + buf = io.BytesIO() + plt.savefig(buf, format="png") + plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + return img + else: + return plt + + +def plot_step_to_video_alignment(corresp_mat, size=(15, 2)): + """corresp_mat is of shape [K, N], where K is num_steps, and N is video_len""" + step_ids = np.arange(corresp_mat.size(0)) + 1 + labels = corresp_mat.to(float).argmax(0) + 1 * corresp_mat.to(bool).any(0) + + K_present = corresp_mat.to(bool).any(1).to(int).sum().item() + name = f"Video Segmentation | {K_present} steps present" + return plot_alignment(step_ids, labels, color_code, shape_code, name=name, size=size) + + +def plot_similarities( + sim, + drop_line=None, + colors=None, + select=None, + color_offset=0, + do_legend=True, + name="", + size=(15, 2), + grid_on=True, + to_np=True, + linewidth=1, +): + colors = colors if colors is not None else color_code + K, N = sim.shape + select = select if select is not None else np.arange(K) + + plt.rcParams["figure.figsize"] = (size[0], size[1]) + ax = plt.subplot(1, 1, 1) + _ = ax.set_title(name) + + _ = plt.xticks(np.arange(0, N, 20)) + _ = plt.xlim(0, N) + _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True) + if grid_on: + _ = plt.grid() + + for i in range(K): + if i in select: + _ = plt.plot(np.arange(N), sim[i], color=colors[i + color_offset], label=str(i), linewidth=linewidth) + + if drop_line is not None: + _ = plt.plot(np.arange(N), drop_line * np.ones(N), "--") + + if do_legend: + _ = plt.xlim(0, N + int(0.10 * N)) + plt.legend() + + if to_np: + buf = io.BytesIO() + plt.savefig(buf, format="png") + plt.close() + buf.seek(0) + img = np.array(Image.open(buf).convert("RGB")) + return img + else: + return plt + + +def plot_gt_seg(N, starts, ends, colors=None, shapes=None, name="GT Seg", clip_len=1, size=(15, 2), grid_on=True): + colors = colors if colors is not None else color_code + shapes = shapes if shapes is not None else shape_code + + K = len(starts) + labels = -np.ones(N) + for i in range(K): + s, e = int(starts[i]), int(ends[i]) + labels[s : e + 1] = i + step_ids = np.arange(K) + return plot_alignment(step_ids, labels, colors, shapes, to_np=False, name=name, size=size, grid_on=grid_on) diff --git a/yc2_univl/backup/pdvc/matcher.py b/yc2_univl/backup/pdvc/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..3311680756df6cf1efeed2bbe2ab55350525b4ce --- /dev/null +++ b/yc2_univl/backup/pdvc/matcher.py @@ -0,0 +1,446 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Modules to compute the matching cost and solve the corresponding LSAP. +""" +import torch +from scipy.optimize import linear_sum_assignment +from torch import nn +import torch.nn.functional as F +from torch import log, exp +import numpy as np + +from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou + +# For matcher_align +from pdvc.dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine +from pdvc.dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine +from pdvc.dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine +from pdvc.dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine +# from dp.gpu_nw import gpu_nw +from pdvc.dp.dp_utils import compute_all_costs, compute_double_costs + + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + +class HungarianMatcher(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + + For efficiency reasons, the targets don't include the no_object. Because of this, in general, + there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, + while the others are un-matched (and thus treated as non-objects). + """ + + def __init__(self, + cost_class: float = 1, + cost_bbox: float = 1, + cost_giou: float = 1, + cost_alpha = 0.25, + cost_gamma = 2, + use_pseudo_box = False): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the classification error in the matching cost + cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost + cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_bbox = cost_bbox + self.cost_giou = cost_giou + # self.cost_caption = cost_caption + self.cost_alpha = cost_alpha + self.cost_gamma = cost_gamma + self.use_pseudo_box = use_pseudo_box + + assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0 # or cost_caption!=0, "all costs cant be 0" + # breakpoint() + + def forward(self, outputs, targets, verbose=False, many_to_one=False): + """ Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth + objects in the target) containing the class labels + "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + with torch.no_grad(): + bs, num_queries = outputs["pred_logits"].shape[:2] + # We flatten to compute the cost matrices in a batch + out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid() + out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] + + # Also concat the target labels and boxes + tgt_ids = torch.cat([v["labels"] for v in targets]) + if self.use_pseudo_box and self.training: + # print('use pseudo box') + tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets]) + else: + tgt_bbox = torch.cat([v["boxes"] for v in targets]) + # print('use gt box') + + # Compute the classification cost. + # alpha = 0.25 + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + + # Compute the L1 cost between boxes + cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) + # breakpoint() + + # Compute the giou cost betwen boxes + try: + cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox), + box_cl_to_xy(tgt_bbox)) + except: + print('out_bbox', out_bbox) + print('tgt_bbox', tgt_bbox) + breakpoint() + + # cost_caption = outputs['caption_costs'].flatten(0, 1) + + # Final cost matrix + # breakpoint() + try: # [100, 10], [100, 11], [100, 10] + C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + except: + breakpoint() + + costs = {'cost_bbox': cost_bbox, + 'cost_class': cost_class, + 'cost_giou': cost_giou, + # 'cost_caption': cost_caption, + 'out_bbox': out_bbox[:, 0::2]} + + if verbose: + print('\n') + print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0]) + print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0]) + print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0]) + # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0]) + + C = C.view(bs, num_queries, -1).cpu() + + + sizes = [len(v["boxes_pseudo"]) for v in targets] if self.use_pseudo_box else [len(v["boxes"]) for v in targets] + # pdb.set_trace() + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] + m2o_rate = 4 + rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))] + rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in + enumerate(rl_indices)] + + indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] + + if verbose: + print('------matching results:') + print(indices) + for indice in indices: + for i, j in zip(*indice): + print(out_bbox[i][0::2], tgt_bbox[j][0::2]) + print('-----topK scores:') + topk_indices = out_prob.topk(10, dim=0) + print(topk_indices) + for i,(v,ids) in enumerate(zip(*topk_indices)): + print('top {}'.format(i)) + s= '' + for name,cost in costs.items(): + s += name + ':{} '.format(cost[ids]) + print(s) + + return indices, rl_indices + +class DTWMatcher(nn.Module): + ''' + Drop_z: if True, then we drop both the x axis (query) and z axis (text) + One_to_many: multiple x match to one z + Many_to_one: multiple z match to one x + ''' + def __init__(self, + keep_percentile, + top_band_size=0, + given_droplines=None, + drop_z=True, + one_to_many=False, + many_to_one=False, + contiguous=False): + super().__init__() + self.keep_percentile = keep_percentile + self.top_band_size = top_band_size + self.given_droplines = given_droplines + self.drop_z = drop_z + self.one_to_many = one_to_many + self.many_to_one = many_to_one + self.contiguous = contiguous + + def forward(self, ouputs, targets, text_embed, event_embed): + # computing alignments (without gradients) + orig_device = event_embed[0].device + # embarisingly, this is faster on CPU than on GPU! + sims = compute_sim(text_embed, event_embed, l2_norm=True) + #sims = [s.cpu() for s in sims] + sims = [sims.cpu()] + # TODO: Add the classification cost the the alignment cost + self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines] + with torch.no_grad(): + zx_costs_list = [] + x_drop_costs_list = [] + z_drop_costs_list = [] + for i, sim in enumerate(sims): + # computing the baseline logit + top_sim = sim + if self.given_droplines is None: + if self.top_band_size > 0 and self.top_band_size < sim.shape[1]: + top_sim = sim.topk(self.top_band_size, dim=1).values + + if self.keep_percentile > 1: + dropline = top_sim.min() - 5 + else: + k = max([1, int(torch.numel(top_sim) * self.keep_percentile)]) + dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach() + else: + dropline = self.given_droplines[i] + + # shift the costs by the drop logits, so I can set drop costs to 0 instead + zx_costs_list.append(dropline.reshape([1, 1]) - sim) + z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device) + x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device) + z_drop_costs_list.append(z_drop_cost) + x_drop_costs_list.append(x_drop_cost) + + # TODO figure out if one_to_many and many_to_one should be on + align_paths, corresp_mats = None, None + if self.drop_z: + if not (self.one_to_many or self.many_to_one): + _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + else: + _, align_paths = exact_batch_double_drop_dtw_machine( + # _, align_paths = fast_batch_double_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + z_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + else: + _, align_paths = exact_batch_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + + if corresp_mats is None: + corresp_matrices = [] + for b_id, sim in enumerate(sims): + corresp_matrix = torch.zeros_like(sim) + for i, j, s in align_paths[b_id]: + if s == 0: + corresp_matrix[i - 1, j - 1] = 1 + corresp_matrices.append(corresp_matrix.to(orig_device)) + # corresp_matrices.append(corresp_matrix) + text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0]) + query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0]) + text_indices, rearrange = torch.sort(text_indices) + query_indices = query_indices[rearrange] + indices = [(query_indices, text_indices)] + #return align_paths, corresp_matrices + return indices, [] + +class SimMatcher(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + based on the similarity bewteen text embedding and query embedding + """ + def __init__(self, + cost_class: float = 1, + cost_sim: float = 1, + cost_bbox: float = 1, + cost_giou: float = 1, + cost_alpha = 0.25, + cost_gamma = 2, + use_pseudo_box = False): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the classification error in the matching cost + cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_sim = cost_sim + self.cost_bbox = cost_bbox + self.cost_giou = cost_giou + # self.cost_caption = cost_caption + self.cost_alpha = cost_alpha + self.cost_gamma = cost_gamma + self.use_pseudo_box = use_pseudo_box + + assert cost_class != 0 or cost_sim!=0, "all costs cannot be 0" + # breakpoint() + + def forward(self, outputs, targets, text_embed, event_embed, verbose=False, many_to_one=False): + """ Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth + objects in the target) containing the class labels + "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + with torch.no_grad(): + bs, num_queries = outputs["pred_logits"].shape[:2] + + # We flatten to compute the cost matrices in a batch + out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid() + out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] + + tgt_ids = torch.cat([v["labels"] for v in targets]) + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + + # Also concat the target labels and boxes + # breakpoint() + if self.use_pseudo_box: + tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets]) + # Compute the L1 cost between boxes + cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) + + # Compute the giou cost betwen boxes + cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox), + box_cl_to_xy(tgt_bbox)) + else: + cost_bbox = torch.zeros_like(cost_class) + cost_giou = torch.zeros_like(cost_class) + + # Compute the classification cost. + # alpha = 0.25 + alpha = self.cost_alpha + gamma = self.cost_gamma + neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) + pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) + cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] + # breakpoint() + # Compute the similarity cost + cost_sim = compute_sim(text_embed, event_embed, l2_norm=True).permute(1,0) + cost_sim = torch.ones_like(cost_sim) - cost_sim + # breakpoint() + + # cost_caption = outputs['caption_costs'].flatten(0, 1) + + # Final cost matrix + C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_sim * cost_sim + + costs = {'cost_bbox': cost_bbox, + 'cost_class': cost_class, + 'cost_giou': cost_giou, + 'cost_sim': cost_sim, + # 'cost_caption': cost_caption, + 'out_bbox': out_bbox[:, 0::2], + } + + if verbose: + print('\n') + print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0]) + print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0]) + print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0]) + print(self.cost_sim, cost_sim.var(dim=0), cost_sim.max(dim=0)[0] - cost_sim.min(dim=0)[0]) + # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0]) + + C = C.view(bs, num_queries, -1).cpu() + + sizes = [text_embed.size(0)] + # pdb.set_trace() + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] + m2o_rate = 4 + rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))] + rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in + enumerate(rl_indices)] + + indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] + + + return indices, rl_indices + +def build_matcher(args): + if args.matcher_type == 'DTW': + return DTWMatcher(keep_percentile=args.align_keep_percentile, + top_band_size=args.align_top_band_size, + given_droplines=None, + drop_z=args.align_drop_z, + one_to_many=args.align_one_to_many, + many_to_one=args.align_many_to_one, + contiguous=args.align_contiguous) + elif args.matcher_type == 'Sim': + return SimMatcher(cost_class=args.set_cost_class, + cost_sim=args.set_cost_sim, + cost_bbox=args.set_cost_bbox, + cost_giou=args.set_cost_giou, + cost_alpha = args.cost_alpha, + cost_gamma = args.cost_gamma, + use_pseudo_box = args.use_pseudo_box + ) + else: + return HungarianMatcher(cost_class=args.set_cost_class, + cost_bbox=args.set_cost_bbox, + cost_giou=args.set_cost_giou, + cost_alpha = args.cost_alpha, + cost_gamma = args.cost_gamma, + use_pseudo_box = args.use_pseudo_box + ) + + +def build_matcher_simple(): + #return DTWMatcher(keep_percentile=0.5) + return SimMatcher() + +if __name__ == '__main__': + text_embed = torch.rand(5, 128) + event_embed = torch.rand(15, 128) + #sim = torch.eye(3, 4) + aligner = build_matcher_simple() + indices, matrices = aligner(text_embed, event_embed) + breakpoint() \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/matcher_align.py b/yc2_univl/backup/pdvc/matcher_align.py new file mode 100644 index 0000000000000000000000000000000000000000..e9b93dce7e9ff252230fbb8f8bc2861ce3a16605 --- /dev/null +++ b/yc2_univl/backup/pdvc/matcher_align.py @@ -0,0 +1,154 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Modules to compute the matching cost and solve the corresponding LSAP. +""" +import torch +import torch.nn.functional as F +from torch import log, exp +import numpy as np +from torch import nn +from scipy.optimize import linear_sum_assignment +# from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou + +# For matcher_align +from dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine +from dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine +from dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine +from dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine +# from dp.gpu_nw import gpu_nw +from dp.dp_utils import compute_all_costs, compute_double_costs + + +def compute_sim(z, x, l2_norm): + if l2_norm: + return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T + else: + return z @ x.T + +class DTWMatcher(nn.Module): + ''' + Drop_z: if True, then we drop both the x axis (query) and z axis (text) + One_to_many: multiple x match to one z + Many_to_one: multiple z match to one x + ''' + def __init__(self, + keep_percentile, + top_band_size=0, + given_droplines=None, + drop_z=False, + one_to_many=False, + many_to_one=False, + contiguous=False): + super().__init__() + self.keep_percentile = keep_percentile + self.top_band_size = top_band_size + self.given_droplines = given_droplines + self.drop_z = drop_z + self.one_to_many = one_to_many + self.many_to_one = many_to_one + self.contiguous = contiguous + + def forward(self, text_embed, event_embed): + # computing alignments (without gradients) + orig_device = event_embed.device + # embarisingly, this is faster on CPU than on GPU! + sims = compute_sim(text_embed, event_embed, l2_norm=True) + #sims = [s.cpu() for s in sims] + sims = [sims.cpu()] + self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines] + with torch.no_grad(): + zx_costs_list = [] + x_drop_costs_list = [] + z_drop_costs_list = [] + for i, sim in enumerate(sims): + # computing the baseline logit + top_sim = sim + if self.given_droplines is None: + if self.top_band_size > 0 and self.top_band_size < sim.shape[1]: + top_sim = sim.topk(self.top_band_size, dim=1).values + + if self.keep_percentile > 1: + dropline = top_sim.min() - 5 + else: + k = max([1, int(torch.numel(top_sim) * self.keep_percentile)]) + dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach() + else: + dropline = self.given_droplines[i] + + # shift the costs by the drop logits, so I can set drop costs to 0 instead + zx_costs_list.append(dropline.reshape([1, 1]) - sim) + z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device) + x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device) + z_drop_costs_list.append(z_drop_cost) + x_drop_costs_list.append(x_drop_cost) + + # TODO figure out if one_to_many and many_to_one should be on + align_paths, corresp_mats = None, None + if self.drop_z: + if not (self.one_to_many or self.many_to_one): + _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list) + else: + _, align_paths = exact_batch_double_drop_dtw_machine( + # _, align_paths = fast_batch_double_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + z_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + else: + _, align_paths = exact_batch_drop_dtw_machine( + zx_costs_list, + x_drop_costs_list, + one_to_many=self.one_to_many, + many_to_one=self.many_to_one, + contiguous=self.contiguous, + ) + + if corresp_mats is None: + corresp_matrices = [] + for b_id, sim in enumerate(sims): + corresp_matrix = torch.zeros_like(sim) + for i, j, s in align_paths[b_id]: + if s == 0: + corresp_matrix[i - 1, j - 1] = 1 + corresp_matrices.append(corresp_matrix.to(orig_device)) + # corresp_matrices.append(corresp_matrix) + text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0]) + query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0]) + text_indices, rearrange = torch.sort(text_indices) + query_indices = query_indices[rearrange] + indices = [(query_indices, text_indices)] + #return align_paths, corresp_matrices + return indices, _ + +def build_matcher(args): + return DTWMatcher(keep_percentile=args.align_keep_percentile, + top_band_size=args.align_top_band_size, + given_droplines=None, + drop_z=args.align_drop_z, + one_to_many=args.align_one_to_many, + many_to_one=args.align_many_to_one, + contiguous=args.align_contiguous) + + +def build_matcher_simple(): + return DTWMatcher(keep_percentile=0.5) + +if __name__ == '__main__': + text_embed = torch.rand(5, 128) + event_embed = torch.rand(15, 128) + #sim = torch.eye(3, 4) + aligner = build_matcher_simple() + indices, matrices = aligner(text_embed, event_embed) + breakpoint() diff --git a/yc2_univl/backup/pdvc/modules/UniVL_mini.py b/yc2_univl/backup/pdvc/modules/UniVL_mini.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9d6e960cc742b2eed92827f568734ae91073ce --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/UniVL_mini.py @@ -0,0 +1,1292 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +import copy +import math +import logging +import collections +import unicodedata +import os +from urllib.parse import urlparse +from typing import Optional, Tuple, Union, IO, Callable, Set +from pathlib import Path +import shutil +import tempfile +import json +from hashlib import sha256 +from functools import wraps +import boto3 +from botocore.exceptions import ClientError +import requests +from tqdm import tqdm + + +import torch +from torch import nn + + +logger = logging.getLogger(__name__) + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + +PRETRAINED_MODEL_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz", +} + +CONFIG_NAME = 'bert_config.json' +WEIGHTS_NAME = 'pytorch_model.bin' + +PRETRAINED_VOCAB_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", +} +PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = { + 'base-uncased': 512, + 'large-uncased': 512, + 'base-cased': 512, + 'large-cased': 512, + 'base-multilingual-uncased': 512, + 'base-multilingual-cased': 512, + 'base-chinese': 512, +} +VOCAB_NAME = 'vocab.txt' + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r", encoding="utf-8") as reader: + while True: + token = reader.readline() + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + +def split_s3_path(url: str) -> Tuple[str, str]: + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + +def s3_request(func: Callable): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url: str, *args, **kwargs): + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise FileNotFoundError("file {} not found".format(url)) + else: + raise + + return wrapper + +@s3_request +def s3_etag(url: str) -> Optional[str]: + """Check ETag on S3 object.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + +@s3_request +def s3_get(url: str, temp_file: IO) -> None: + """Pull a file directly from S3.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + +def url_to_filename(url: str, etag: str = None) -> str: + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the url's, delimited + by a period. + """ + url_bytes = url.encode('utf-8') + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode('utf-8') + etag_hash = sha256(etag_bytes) + filename += '.' + etag_hash.hexdigest() + + return filename + +def http_get(url: str, temp_file: IO) -> None: + req = requests.get(url, stream=True) + content_length = req.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + +def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str: + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + os.makedirs(cache_dir, exist_ok=True) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + response = requests.head(url, allow_redirects=True) + if response.status_code != 200: + raise IOError("HEAD request failed for url {} with status code {}" + .format(url, response.status_code)) + etag = response.headers.get("ETag") + + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, 'wb') as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {'url': url, 'etag': etag} + meta_path = cache_path + '.json' + with open(meta_path, 'w') as meta_file: + json.dump(meta, meta_file) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + +def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str: + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ('http', 'https', 's3'): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == '': + # File, but it doesn't exist. + raise FileNotFoundError("file {} not found".format(url_or_filename)) + else: + # Something unknown + raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class BertTokenizer(object): + """Runs end-to-end tokenization: punctuation splitting""" + + def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")): + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)) + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + self.max_len = max_len if max_len is not None else int(1e12) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Converts a sequence of tokens into ids using the vocab.""" + ids = [] + for token in tokens: + if token not in self.vocab: + ids.append(self.vocab["[UNK]"]) + logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token)) + else: + ids.append(self.vocab[token]) + if len(ids) > self.max_len: + raise ValueError( + "Token indices sequence length is longer than the specified maximum " + " sequence length for this BERT model ({} > {}). Running this" + " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) + ) + return ids + + def convert_ids_to_tokens(self, ids): + """Converts a sequence of ids in tokens using the vocab.""" + tokens = [] + for i in ids: + tokens.append(self.ids_to_tokens[i]) + return tokens + + @classmethod + def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedBertModel from a pre-trained model file. + Download and cache the pre-trained model file if needed. + """ + vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(vocab_file) is False: + if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP: + vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name] + else: + vocab_file = pretrained_model_name + if os.path.isdir(vocab_file): + vocab_file = os.path.join(vocab_file, VOCAB_NAME) + # redirect to the cache, if necessary + print(vocab_file) + try: + resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir) + except FileNotFoundError: + logger.error( + "Model name '{}' was not found. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + vocab_file)) + return None + if resolved_vocab_file == vocab_file: + logger.info("loading vocabulary file {}".format(vocab_file)) + else: + logger.info("loading vocabulary file {} from cache at {}".format( + vocab_file, resolved_vocab_file)) + if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP: + # if we're using a pretrained model, ensure the tokenizer wont index sequences longer + # than the number of positional embeddings + max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name] + kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len) + kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]") + + # Instantiate tokenizer. + tokenizer = cls(resolved_vocab_file, *inputs, **kwargs) + + return tokenizer + + def add_tokens(self, new_tokens, model): + """ + Add a list of new tokens to the tokenizer class. If the new tokens are not in the + vocabulary, they are added to it with indices starting from length of the current vocabulary. + Args: + new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them). + Returns: + Number of tokens added to the vocabulary. + Examples:: + # Let's see how to increase the vocabulary of Bert model and tokenizer + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained('bert-base-uncased') + num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2']) + print('We have added', num_added_toks, 'tokens') + model.resize_token_embeddings(len(tokenizer)) # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer. + """ + + to_add_tokens = [] + for token in new_tokens: + assert isinstance(token, str) + to_add_tokens.append(token) + # logger.info("Adding %s to the vocabulary", token) + + vocab = collections.OrderedDict() + for token in self.vocab.keys(): + vocab[token] = self.vocab[token] + for token in to_add_tokens: + vocab[token] = len(vocab) + self.vocab = self.wordpiece_tokenizer.vocab = vocab + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + + model.resize_token_embeddings(new_num_tokens=len(vocab)) + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + self.never_split = never_split + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case and token not in self.never_split: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + if text in self.never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + +class WordpieceTokenizer(object): + """Runs WordPiece tokenization.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer`. + + Returns: + A list of wordpiece tokens. + """ + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False + + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +def swish(x): + return x * torch.sigmoid(x) + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish} + +class LayerNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-12): + """Construct a layernorm module in the TF style (epsilon inside the square root). + """ + super(LayerNorm, self).__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.bias = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward(self, x): + u = x.mean(-1, keepdim=True) + s = (x - u).pow(2).mean(-1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.variance_epsilon) + return self.weight * x + self.bias + +class PretrainedConfig(object): + + pretrained_model_archive_map = {} + config_name = "" + weights_name = "" + + @classmethod + def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None): + archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(archive_file) is False: + if pretrained_model_name in cls.pretrained_model_archive_map: + archive_file = cls.pretrained_model_archive_map[pretrained_model_name] + else: + archive_file = pretrained_model_name + + # redirect to the cache, if necessary + try: + resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir) + except FileNotFoundError: + if task_config is None or task_config.local_rank == 0: + logger.error( + "Model name '{}' was not found in model name list. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + archive_file)) + return None + if resolved_archive_file == archive_file: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {}".format(archive_file)) + else: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {} from cache at {}".format( + archive_file, resolved_archive_file)) + tempdir = None + if os.path.isdir(resolved_archive_file): + serialization_dir = resolved_archive_file + else: + # Extract archive to temp dir + tempdir = tempfile.mkdtemp() + if task_config is None or task_config.local_rank == 0: + logger.info("extracting archive file {} to temp dir {}".format( + resolved_archive_file, tempdir)) + with tarfile.open(resolved_archive_file, 'r:gz') as archive: + archive.extractall(tempdir) + serialization_dir = tempdir + # Load config + config_file = os.path.join(serialization_dir, cls.config_name) + config = cls.from_json_file(config_file) + config.type_vocab_size = type_vocab_size + if task_config is None or task_config.local_rank == 0: + logger.info("Model config {}".format(config)) + + if state_dict is None: + weights_path = os.path.join(serialization_dir, cls.weights_name) + if os.path.exists(weights_path): + state_dict = torch.load(weights_path, map_location='cpu') + else: + if task_config is None or task_config.local_rank == 0: + logger.info("Weight doesn't exsits. {}".format(weights_path)) + + if tempdir: + # Clean up temp dir + shutil.rmtree(tempdir) + + return config, state_dict + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = cls(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + +class BertConfig(PretrainedConfig): + """Configuration class to store the configuration of a `BertModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs BertConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class PreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, config, *inputs, **kwargs): + super(PreTrainedModel, self).__init__() + # if not isinstance(config, PretrainedConfig): + # raise ValueError( + # "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " + # "To create a model from a Google pretrained model use " + # "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( + # self.__class__.__name__, self.__class__.__name__ + # )) + self.config = config + + def init_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, LayerNorm): + if 'beta' in dir(module) and 'gamma' in dir(module): + module.beta.data.zero_() + module.gamma.data.fill_(1.0) + else: + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + def resize_token_embeddings(self, new_num_tokens=None): + raise NotImplementedError + + @classmethod + def init_preweight(cls, model, state_dict, prefix=None, task_config=None): + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + if prefix is not None: + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + old_keys.append(key) + new_keys.append(prefix + key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(model, prefix='') + + if prefix is None and (task_config is None or task_config.local_rank == 0): + logger.info("-" * 20) + if len(missing_keys) > 0: + logger.info("Weights of {} not initialized from pretrained model: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(missing_keys))) + if len(unexpected_keys) > 0: + logger.info("Weights from pretrained model not used in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(unexpected_keys))) + if len(error_msgs) > 0: + logger.error("Weights from pretrained model cause errors in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(error_msgs))) + + return model + + @property + def dtype(self): + """ + :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype). + """ + try: + return next(self.parameters()).dtype + except StopIteration: + # For nn.DataParallel compatibility in PyTorch 1.5 + def find_tensor_attributes(module: nn.Module): + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = self._named_members(get_members_fn=find_tensor_attributes) + first_tuple = next(gen) + return first_tuple[1].dtype + + @classmethod + def from_pretrained(cls, config, state_dict=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict. + Download and cache the pre-trained model file if needed. + """ + # Instantiate model. + model = cls(config, *inputs, **kwargs) + if state_dict is None: + return model + model = cls.init_preweight(model, state_dict) + + return model + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + layer = BertLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class BertPooler(nn.Module): + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(bert_model_embedding_weights.size(1), + bert_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = bert_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + +class BertModel(PreTrainedModel): + """BERT model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a BertConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see BERT's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) + ``` + """ + def __init__(self, config): + super(BertModel, self).__init__(config) + self.config = config + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output + + +def build_UniVL_text_encoder(dict): + bert_config = BertConfig.from_dict(dict) + bert = BertModel(bert_config) + + return bert + +def build_UniVL_tokenizer(): + return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) + + + +def load_pretrained_UniVL(args, device, n_gpu, local_rank, init_model=None): + + if init_model: + model_state_dict = torch.load(init_model, map_location='cpu') + else: + model_state_dict = None + + # Prepare model + cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed') + model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base', + cache_dir=cache_dir, state_dict=model_state_dict, task_config=args) + + model.to(device) + + return model + +if __name__ == '__main__': + bert_config_dict = { + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522 + } + tokenizer = build_UniVL_tokenizer() + bert = build_UniVL_text_encoder(bert_config_dict) + words = ["[CLS]"] + ['you', 'love', 'you'] + ["[SEP]"] + #input_ids = tokenizer.convert_tokens_to_ids(words) + #masked_tokens = words.copy() + #masked_token_ids = tokenizer.convert_tokens_to_ids(masked_tokens) + token_type_ids = None + breakpoint() + encoded_layers, _ = bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True) + sequence_output = encoded_layers[-1] + diff --git a/yc2_univl/backup/pdvc/modules/__init__.py b/yc2_univl/backup/pdvc/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d34e0a02cf990fffc878b695beee9637074e33d0 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..874dd9210e523da3f66f8b15054a96cadeee908f Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2030617dd39a30551bcda930768bb5af198af31 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3218f01ae734e108885fd322ff9db4dc73b204fe Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0585085b54395651e7fa6b8fb60d877f579733ce Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f39ca45e9cc3f91242f4e039001dc5f6f2636af Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93a08af0acd6f720e525203d381fe91f1bc3b33f Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6631deadc8c18f93e755eca7dd975b6ce83b6ca1 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5148122f4202468a675012274b2eead3a84a1510 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff40d3cabb4bad221fc02eb703a9b76971c01709 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64204d60a2c8da0639a86e05802791c7a65e4c17 Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/modules/beam.py b/yc2_univl/backup/pdvc/modules/beam.py new file mode 100644 index 0000000000000000000000000000000000000000..eff1d961ef393e03a3c9105022b1047f5ea7133d --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/beam.py @@ -0,0 +1,116 @@ +""" +Manage beam search info structure. +Heavily borrowed from OpenNMT-py. +For code in OpenNMT-py, please check the following link (maybe in oldest version): +https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py +""" + +import torch + +class Constants(): + def __init__(self): + self.PAD = 0 + self.UNK = 1 + self.BOS = 2 + self.EOS = 3 + self.PAD_WORD = '[PAD]' + self.UNK_WORD = '[UNK]' + self.BOS_WORD = '[CLS]' + self.EOS_WORD = '[SEP]' + + @classmethod + def from_tokenizer(cls, tokenizer): + instance = cls() + instance.PAD = tokenizer.vocab[instance.PAD_WORD] + instance.UNK = tokenizer.vocab[instance.UNK_WORD] + instance.BOS = tokenizer.vocab[instance.BOS_WORD] + instance.EOS = tokenizer.vocab[instance.EOS_WORD] + return instance + +class Beam(): + ''' Beam search ''' + + def __init__(self, size, device=False, tokenizer=None): + if tokenizer is None: + self.constants = Constants() + else: + self.constants = Constants.from_tokenizer(tokenizer) + + self.size = size + self._done = False + # The score for each interface on the beam. + self.scores = torch.zeros((size,), dtype=torch.float, device=device) + self.all_scores = [] + + # The backpointers at each time-step. + self.prev_ks = [] + + # The outputs at each time-step. + self.next_ys = [torch.full((size,), self.constants.BOS, dtype=torch.long, device=device)] + + def get_current_state(self): + "Get the outputs for the current timestep." + return self.get_tentative_hypothesis() + + def get_current_origin(self): + "Get the backpointers for the current timestep." + return self.prev_ks[-1] + + @property + def done(self): + return self._done + + def advance(self, word_prob, word_length=None): + + "Update beam status and check if finished or not." + num_words = word_prob.size(1) + # Sum the previous scores. + if len(self.prev_ks) > 0: + beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) + else: + beam_lk = word_prob[0] + flat_beam_lk = beam_lk.view(-1) + best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort + self.all_scores.append(self.scores) + self.scores = best_scores + # bestScoresId is flattened as a (beam x word) array, + # so we need to calculate which word and beam each score came from + prev_k = best_scores_id // num_words + self.prev_ks.append(prev_k) + self.next_ys.append(best_scores_id - prev_k * num_words) + # End condition is when top-of-beam is EOS. + if self.next_ys[-1][0].item() == self.constants.EOS: + self._done = True + + return self._done + + def sort_scores(self): + "Sort the scores." + return torch.sort(self.scores, 0, True) + + def get_the_best_score_and_idx(self): + "Get the score of the best in the beam." + scores, ids = self.sort_scores() + return scores[1], ids[1] + + def get_tentative_hypothesis(self): + "Get the decoded sequence for the current timestep." + + if len(self.next_ys) == 1: + dec_seq = self.next_ys[0].unsqueeze(1) + else: + _, keys = self.sort_scores() + hyps = [self.get_hypothesis(k) for k in keys] + hyps = [[self.constants.BOS] + h for h in hyps] + dec_seq = torch.LongTensor(hyps) + + return dec_seq + + def get_hypothesis(self, k): + """ Walk back to construct the full hypothesis. """ + hyp = [] + for j in range(len(self.prev_ks) - 1, -1, -1): + hyp.append(self.next_ys[j+1][k]) + k = self.prev_ks[j][k] + + return list(map(lambda x: x.item(), hyp[::-1])) diff --git a/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json b/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fca794a5f07ff8f963fe8b61e3694b0fb7f955df --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json @@ -0,0 +1,13 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522 +} diff --git a/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt b/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..fb140275c155a9c7c5a3b3e0e77a9e839594a938 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt @@ -0,0 +1,30522 @@ +[PAD] +[unused0] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[UNK] +[CLS] +[SEP] +[MASK] +[unused99] +[unused100] +[unused101] +[unused102] +[unused103] +[unused104] +[unused105] +[unused106] +[unused107] +[unused108] +[unused109] +[unused110] +[unused111] +[unused112] +[unused113] +[unused114] +[unused115] +[unused116] +[unused117] +[unused118] +[unused119] +[unused120] +[unused121] +[unused122] +[unused123] +[unused124] +[unused125] +[unused126] +[unused127] +[unused128] +[unused129] +[unused130] +[unused131] +[unused132] +[unused133] +[unused134] +[unused135] +[unused136] +[unused137] +[unused138] +[unused139] +[unused140] +[unused141] +[unused142] +[unused143] +[unused144] +[unused145] +[unused146] +[unused147] +[unused148] +[unused149] +[unused150] +[unused151] +[unused152] +[unused153] +[unused154] +[unused155] +[unused156] +[unused157] +[unused158] +[unused159] +[unused160] +[unused161] +[unused162] +[unused163] +[unused164] +[unused165] +[unused166] +[unused167] +[unused168] +[unused169] +[unused170] +[unused171] +[unused172] +[unused173] +[unused174] +[unused175] +[unused176] +[unused177] +[unused178] +[unused179] +[unused180] +[unused181] +[unused182] +[unused183] +[unused184] +[unused185] +[unused186] +[unused187] +[unused188] +[unused189] +[unused190] +[unused191] +[unused192] +[unused193] +[unused194] +[unused195] +[unused196] +[unused197] +[unused198] +[unused199] +[unused200] +[unused201] +[unused202] +[unused203] +[unused204] +[unused205] +[unused206] +[unused207] +[unused208] +[unused209] +[unused210] +[unused211] +[unused212] +[unused213] +[unused214] +[unused215] +[unused216] +[unused217] +[unused218] +[unused219] +[unused220] +[unused221] +[unused222] +[unused223] +[unused224] +[unused225] +[unused226] +[unused227] +[unused228] +[unused229] +[unused230] +[unused231] +[unused232] +[unused233] +[unused234] +[unused235] +[unused236] +[unused237] +[unused238] +[unused239] +[unused240] +[unused241] +[unused242] +[unused243] +[unused244] +[unused245] +[unused246] +[unused247] +[unused248] +[unused249] +[unused250] +[unused251] +[unused252] +[unused253] +[unused254] +[unused255] +[unused256] +[unused257] +[unused258] +[unused259] +[unused260] +[unused261] +[unused262] +[unused263] +[unused264] +[unused265] +[unused266] +[unused267] +[unused268] +[unused269] +[unused270] +[unused271] +[unused272] +[unused273] +[unused274] +[unused275] +[unused276] +[unused277] +[unused278] +[unused279] +[unused280] +[unused281] +[unused282] +[unused283] +[unused284] +[unused285] +[unused286] +[unused287] +[unused288] +[unused289] +[unused290] +[unused291] +[unused292] +[unused293] +[unused294] +[unused295] +[unused296] +[unused297] +[unused298] +[unused299] +[unused300] +[unused301] +[unused302] +[unused303] +[unused304] +[unused305] +[unused306] +[unused307] +[unused308] +[unused309] +[unused310] +[unused311] +[unused312] +[unused313] +[unused314] +[unused315] +[unused316] +[unused317] +[unused318] +[unused319] +[unused320] +[unused321] +[unused322] +[unused323] +[unused324] +[unused325] +[unused326] +[unused327] +[unused328] +[unused329] +[unused330] +[unused331] +[unused332] +[unused333] +[unused334] +[unused335] +[unused336] +[unused337] +[unused338] +[unused339] +[unused340] +[unused341] +[unused342] +[unused343] +[unused344] +[unused345] +[unused346] +[unused347] +[unused348] +[unused349] +[unused350] +[unused351] +[unused352] +[unused353] +[unused354] +[unused355] +[unused356] +[unused357] +[unused358] +[unused359] +[unused360] +[unused361] +[unused362] +[unused363] +[unused364] +[unused365] +[unused366] +[unused367] +[unused368] +[unused369] +[unused370] +[unused371] +[unused372] +[unused373] +[unused374] +[unused375] +[unused376] +[unused377] +[unused378] +[unused379] +[unused380] +[unused381] +[unused382] +[unused383] +[unused384] +[unused385] +[unused386] +[unused387] +[unused388] +[unused389] +[unused390] +[unused391] +[unused392] +[unused393] +[unused394] +[unused395] +[unused396] +[unused397] +[unused398] +[unused399] +[unused400] +[unused401] +[unused402] +[unused403] +[unused404] +[unused405] +[unused406] +[unused407] +[unused408] +[unused409] +[unused410] +[unused411] +[unused412] +[unused413] +[unused414] +[unused415] +[unused416] +[unused417] +[unused418] +[unused419] +[unused420] +[unused421] +[unused422] +[unused423] +[unused424] +[unused425] +[unused426] +[unused427] +[unused428] +[unused429] +[unused430] +[unused431] +[unused432] +[unused433] +[unused434] +[unused435] +[unused436] +[unused437] +[unused438] +[unused439] +[unused440] +[unused441] +[unused442] +[unused443] +[unused444] +[unused445] +[unused446] +[unused447] +[unused448] +[unused449] +[unused450] +[unused451] +[unused452] +[unused453] +[unused454] +[unused455] +[unused456] +[unused457] +[unused458] +[unused459] +[unused460] +[unused461] +[unused462] +[unused463] +[unused464] +[unused465] +[unused466] +[unused467] +[unused468] +[unused469] +[unused470] +[unused471] +[unused472] +[unused473] +[unused474] +[unused475] +[unused476] +[unused477] +[unused478] +[unused479] +[unused480] +[unused481] +[unused482] +[unused483] +[unused484] +[unused485] +[unused486] +[unused487] +[unused488] +[unused489] +[unused490] +[unused491] +[unused492] +[unused493] +[unused494] +[unused495] +[unused496] +[unused497] +[unused498] +[unused499] +[unused500] +[unused501] +[unused502] +[unused503] +[unused504] +[unused505] +[unused506] +[unused507] +[unused508] +[unused509] +[unused510] +[unused511] +[unused512] +[unused513] +[unused514] +[unused515] +[unused516] +[unused517] +[unused518] +[unused519] +[unused520] +[unused521] +[unused522] +[unused523] +[unused524] +[unused525] +[unused526] +[unused527] +[unused528] +[unused529] +[unused530] +[unused531] +[unused532] +[unused533] +[unused534] +[unused535] +[unused536] +[unused537] +[unused538] +[unused539] +[unused540] +[unused541] +[unused542] +[unused543] +[unused544] +[unused545] +[unused546] +[unused547] +[unused548] +[unused549] +[unused550] +[unused551] +[unused552] +[unused553] +[unused554] +[unused555] +[unused556] +[unused557] +[unused558] +[unused559] +[unused560] +[unused561] +[unused562] +[unused563] +[unused564] +[unused565] +[unused566] +[unused567] +[unused568] +[unused569] +[unused570] +[unused571] +[unused572] +[unused573] +[unused574] +[unused575] +[unused576] +[unused577] +[unused578] +[unused579] +[unused580] +[unused581] +[unused582] +[unused583] +[unused584] +[unused585] +[unused586] +[unused587] +[unused588] +[unused589] +[unused590] +[unused591] +[unused592] +[unused593] +[unused594] +[unused595] +[unused596] +[unused597] +[unused598] +[unused599] +[unused600] +[unused601] +[unused602] +[unused603] +[unused604] +[unused605] +[unused606] +[unused607] +[unused608] +[unused609] +[unused610] +[unused611] +[unused612] +[unused613] +[unused614] +[unused615] +[unused616] +[unused617] +[unused618] +[unused619] +[unused620] +[unused621] +[unused622] +[unused623] +[unused624] +[unused625] +[unused626] +[unused627] +[unused628] +[unused629] +[unused630] +[unused631] +[unused632] +[unused633] +[unused634] +[unused635] +[unused636] +[unused637] +[unused638] +[unused639] +[unused640] +[unused641] +[unused642] +[unused643] +[unused644] +[unused645] +[unused646] +[unused647] +[unused648] +[unused649] +[unused650] +[unused651] +[unused652] +[unused653] +[unused654] +[unused655] +[unused656] +[unused657] +[unused658] +[unused659] +[unused660] +[unused661] +[unused662] +[unused663] +[unused664] +[unused665] +[unused666] +[unused667] +[unused668] +[unused669] +[unused670] +[unused671] +[unused672] +[unused673] +[unused674] +[unused675] +[unused676] +[unused677] +[unused678] +[unused679] +[unused680] +[unused681] +[unused682] +[unused683] +[unused684] +[unused685] +[unused686] +[unused687] +[unused688] +[unused689] +[unused690] +[unused691] +[unused692] +[unused693] +[unused694] +[unused695] +[unused696] +[unused697] +[unused698] +[unused699] +[unused700] +[unused701] +[unused702] +[unused703] +[unused704] +[unused705] +[unused706] +[unused707] +[unused708] +[unused709] +[unused710] +[unused711] +[unused712] +[unused713] +[unused714] +[unused715] +[unused716] +[unused717] +[unused718] +[unused719] +[unused720] +[unused721] +[unused722] +[unused723] +[unused724] +[unused725] +[unused726] +[unused727] +[unused728] +[unused729] +[unused730] +[unused731] +[unused732] +[unused733] +[unused734] +[unused735] +[unused736] +[unused737] +[unused738] +[unused739] +[unused740] +[unused741] +[unused742] +[unused743] +[unused744] +[unused745] +[unused746] +[unused747] +[unused748] +[unused749] +[unused750] +[unused751] +[unused752] +[unused753] +[unused754] +[unused755] +[unused756] +[unused757] +[unused758] +[unused759] +[unused760] +[unused761] +[unused762] +[unused763] +[unused764] +[unused765] +[unused766] +[unused767] +[unused768] +[unused769] +[unused770] +[unused771] +[unused772] +[unused773] +[unused774] +[unused775] +[unused776] +[unused777] +[unused778] +[unused779] +[unused780] +[unused781] +[unused782] +[unused783] +[unused784] +[unused785] +[unused786] +[unused787] +[unused788] +[unused789] +[unused790] +[unused791] +[unused792] +[unused793] +[unused794] +[unused795] +[unused796] +[unused797] +[unused798] +[unused799] +[unused800] +[unused801] +[unused802] +[unused803] +[unused804] +[unused805] +[unused806] +[unused807] +[unused808] +[unused809] +[unused810] +[unused811] +[unused812] +[unused813] +[unused814] +[unused815] +[unused816] +[unused817] +[unused818] +[unused819] +[unused820] +[unused821] +[unused822] +[unused823] +[unused824] +[unused825] +[unused826] +[unused827] +[unused828] +[unused829] +[unused830] +[unused831] +[unused832] +[unused833] +[unused834] +[unused835] +[unused836] +[unused837] +[unused838] +[unused839] +[unused840] +[unused841] +[unused842] +[unused843] +[unused844] +[unused845] +[unused846] +[unused847] +[unused848] +[unused849] +[unused850] +[unused851] +[unused852] +[unused853] +[unused854] +[unused855] +[unused856] +[unused857] +[unused858] +[unused859] +[unused860] +[unused861] +[unused862] +[unused863] +[unused864] +[unused865] +[unused866] +[unused867] +[unused868] +[unused869] +[unused870] +[unused871] +[unused872] +[unused873] +[unused874] +[unused875] +[unused876] +[unused877] +[unused878] +[unused879] +[unused880] +[unused881] +[unused882] +[unused883] +[unused884] +[unused885] +[unused886] +[unused887] +[unused888] +[unused889] +[unused890] +[unused891] +[unused892] +[unused893] +[unused894] +[unused895] +[unused896] +[unused897] +[unused898] +[unused899] +[unused900] +[unused901] +[unused902] +[unused903] +[unused904] +[unused905] +[unused906] +[unused907] +[unused908] +[unused909] +[unused910] +[unused911] +[unused912] +[unused913] +[unused914] +[unused915] +[unused916] +[unused917] +[unused918] +[unused919] +[unused920] +[unused921] +[unused922] +[unused923] +[unused924] +[unused925] +[unused926] +[unused927] +[unused928] +[unused929] +[unused930] +[unused931] +[unused932] +[unused933] +[unused934] +[unused935] +[unused936] +[unused937] +[unused938] +[unused939] +[unused940] +[unused941] +[unused942] +[unused943] +[unused944] +[unused945] +[unused946] +[unused947] +[unused948] +[unused949] +[unused950] +[unused951] +[unused952] +[unused953] +[unused954] +[unused955] +[unused956] +[unused957] +[unused958] +[unused959] +[unused960] +[unused961] +[unused962] +[unused963] +[unused964] +[unused965] +[unused966] +[unused967] +[unused968] +[unused969] +[unused970] +[unused971] +[unused972] +[unused973] +[unused974] +[unused975] +[unused976] +[unused977] +[unused978] +[unused979] +[unused980] +[unused981] +[unused982] +[unused983] +[unused984] +[unused985] +[unused986] +[unused987] +[unused988] +[unused989] +[unused990] +[unused991] +[unused992] +[unused993] +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +¡ +¢ +£ +¤ +¥ +¦ +§ +¨ +© +ª +« +¬ +® +° +± +² +³ +´ +µ +¶ +· +¹ +º +» +¼ +½ +¾ +¿ +× +ß +æ +ð +÷ +ø +þ +đ +ħ +ı +ł +ŋ +œ +ƒ +ɐ +ɑ +ɒ +ɔ +ɕ +ə +ɛ +ɡ +ɣ +ɨ +ɪ +ɫ +ɬ +ɯ +ɲ +ɴ +ɹ +ɾ +ʀ +ʁ +ʂ +ʃ +ʉ +ʊ +ʋ +ʌ +ʎ +ʐ +ʑ +ʒ +ʔ +ʰ +ʲ +ʳ +ʷ +ʸ +ʻ +ʼ +ʾ +ʿ +ˈ +ː +ˡ +ˢ +ˣ +ˤ +α +β +γ +δ +ε +ζ +η +θ +ι +κ +λ +μ +ν +ξ +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +ђ +є +і +ј +љ +њ +ћ +ӏ +ա +բ +գ +դ +ե +թ +ի +լ +կ +հ +մ +յ +ն +ո +պ +ս +վ +տ +ր +ւ +ք +־ +א +ב +ג +ד +ה +ו +ז +ח +ט +י +ך +כ +ל +ם +מ +ן +נ +ס +ע +ף +פ +ץ +צ +ק +ר +ש +ת +، +ء +ا +ب +ة +ت +ث +ج +ح +خ +د +ذ +ر +ز +س +ش +ص +ض +ط +ظ +ع +غ +ـ +ف +ق +ك +ل +م +ن +ه +و +ى +ي +ٹ +پ +چ +ک +گ +ں +ھ +ہ +ی +ے +अ +आ +उ +ए +क +ख +ग +च +ज +ट +ड +ण +त +थ +द +ध +न +प +ब +भ +म +य +र +ल +व +श +ष +स +ह +ा +ि +ी +ो +। +॥ +ং +অ +আ +ই +উ +এ +ও +ক +খ +গ +চ +ছ +জ +ট +ড +ণ +ত +থ +দ +ধ +ন +প +ব +ভ +ম +য +র +ল +শ +ষ +স +হ +া +ি +ী +ে +க +ச +ட +த +ந +ன +ப +ம +ய +ர +ல +ள +வ +ா +ி +ு +ே +ை +ನ +ರ +ಾ +ක +ය +ර +ල +ව +ා +ก +ง +ต +ท +น +พ +ม +ย +ร +ล +ว +ส +อ +า +เ +་ +། +ག +ང +ད +ན +པ +བ +མ +འ +ར +ལ +ས +မ +ა +ბ +გ +დ +ე +ვ +თ +ი +კ +ლ +მ +ნ +ო +რ +ს +ტ +უ +ᄀ +ᄂ +ᄃ +ᄅ +ᄆ +ᄇ +ᄉ +ᄊ +ᄋ +ᄌ +ᄎ +ᄏ +ᄐ +ᄑ +ᄒ +ᅡ +ᅢ +ᅥ +ᅦ +ᅧ +ᅩ +ᅪ +ᅭ +ᅮ +ᅯ +ᅲ +ᅳ +ᅴ +ᅵ +ᆨ +ᆫ +ᆯ +ᆷ +ᆸ +ᆼ +ᴬ +ᴮ +ᴰ +ᴵ +ᴺ +ᵀ +ᵃ +ᵇ +ᵈ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵖ +ᵗ +ᵘ +ᵢ +ᵣ +ᵤ +ᵥ +ᶜ +ᶠ +‐ +‑ +‒ +– +— +― +‖ +‘ +’ +‚ +“ +” +„ +† +‡ +• +… +‰ +′ +″ +› +‿ +⁄ +⁰ +ⁱ +⁴ +⁵ +⁶ +⁷ +⁸ +⁹ +⁺ +⁻ +ⁿ +₀ +₁ +₂ +₃ +₄ +₅ +₆ +₇ +₈ +₉ +₊ +₍ +₎ +ₐ +ₑ +ₒ +ₓ +ₕ +ₖ +ₗ +ₘ +ₙ +ₚ +ₛ +ₜ +₤ +₩ +€ +₱ +₹ +ℓ +№ +ℝ +™ +⅓ +⅔ +← +↑ +→ +↓ +↔ +↦ +⇄ +⇌ +⇒ +∂ +∅ +∆ +∇ +∈ +− +∗ +∘ +√ +∞ +∧ +∨ +∩ +∪ +≈ +≡ +≤ +≥ +⊂ +⊆ +⊕ +⊗ +⋅ +─ +│ +■ +▪ +● +★ +☆ +☉ +♠ +♣ +♥ +♦ +♭ +♯ +⟨ +⟩ +ⱼ +⺩ +⺼ +⽥ +、 +。 +〈 +〉 +《 +》 +「 +」 +『 +』 +〜 +あ +い +う +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +っ +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +や +ゆ +よ +ら +り +る +れ +ろ +を +ん +ァ +ア +ィ +イ +ウ +ェ +エ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ャ +ュ +ョ +ラ +リ +ル +レ +ロ +ワ +ン +・ +ー +一 +三 +上 +下 +不 +世 +中 +主 +久 +之 +也 +事 +二 +五 +井 +京 +人 +亻 +仁 +介 +代 +仮 +伊 +会 +佐 +侍 +保 +信 +健 +元 +光 +八 +公 +内 +出 +分 +前 +劉 +力 +加 +勝 +北 +区 +十 +千 +南 +博 +原 +口 +古 +史 +司 +合 +吉 +同 +名 +和 +囗 +四 +国 +國 +土 +地 +坂 +城 +堂 +場 +士 +夏 +外 +大 +天 +太 +夫 +奈 +女 +子 +学 +宀 +宇 +安 +宗 +定 +宣 +宮 +家 +宿 +寺 +將 +小 +尚 +山 +岡 +島 +崎 +川 +州 +巿 +帝 +平 +年 +幸 +广 +弘 +張 +彳 +後 +御 +德 +心 +忄 +志 +忠 +愛 +成 +我 +戦 +戸 +手 +扌 +政 +文 +新 +方 +日 +明 +星 +春 +昭 +智 +曲 +書 +月 +有 +朝 +木 +本 +李 +村 +東 +松 +林 +森 +楊 +樹 +橋 +歌 +止 +正 +武 +比 +氏 +民 +水 +氵 +氷 +永 +江 +沢 +河 +治 +法 +海 +清 +漢 +瀬 +火 +版 +犬 +王 +生 +田 +男 +疒 +発 +白 +的 +皇 +目 +相 +省 +真 +石 +示 +社 +神 +福 +禾 +秀 +秋 +空 +立 +章 +竹 +糹 +美 +義 +耳 +良 +艹 +花 +英 +華 +葉 +藤 +行 +街 +西 +見 +訁 +語 +谷 +貝 +貴 +車 +軍 +辶 +道 +郎 +郡 +部 +都 +里 +野 +金 +鈴 +镇 +長 +門 +間 +阝 +阿 +陳 +陽 +雄 +青 +面 +風 +食 +香 +馬 +高 +龍 +龸 +fi +fl +! +( +) +, +- +. +/ +: +? +~ +the +of +and +in +to +was +he +is +as +for +on +with +that +it +his +by +at +from +her +##s +she +you +had +an +were +but +be +this +are +not +my +they +one +which +or +have +him +me +first +all +also +their +has +up +who +out +been +when +after +there +into +new +two +its +##a +time +would +no +what +about +said +we +over +then +other +so +more +##e +can +if +like +back +them +only +some +could +##i +where +just +##ing +during +before +##n +do +##o +made +school +through +than +now +years +most +world +may +between +down +well +three +##d +year +while +will +##ed +##r +##y +later +##t +city +under +around +did +such +being +used +state +people +part +know +against +your +many +second +university +both +national +##er +these +don +known +off +way +until +re +how +even +get +head +... +didn +##ly +team +american +because +de +##l +born +united +film +since +still +long +work +south +us +became +any +high +again +day +family +see +right +man +eyes +house +season +war +states +including +took +life +north +same +each +called +name +much +place +however +go +four +group +another +found +won +area +here +going +10 +away +series +left +home +music +best +make +hand +number +company +several +never +last +john +000 +very +album +take +end +good +too +following +released +game +played +little +began +district +##m +old +want +those +side +held +own +early +county +ll +league +use +west +##u +face +think +##es +2010 +government +##h +march +came +small +general +town +june +##on +line +based +something +##k +september +thought +looked +along +international +2011 +air +july +club +went +january +october +our +august +april +york +12 +few +2012 +2008 +east +show +member +college +2009 +father +public +##us +come +men +five +set +station +church +##c +next +former +november +room +party +located +december +2013 +age +got +2007 +##g +system +let +love +2006 +though +every +2014 +look +song +water +century +without +body +black +night +within +great +women +single +ve +building +large +population +river +named +band +white +started +##an +once +15 +20 +should +18 +2015 +service +top +built +british +open +death +king +moved +local +times +children +february +book +why +11 +door +need +president +order +final +road +wasn +although +due +major +died +village +third +knew +2016 +asked +turned +st +wanted +say +##p +together +received +main +son +served +different +##en +behind +himself +felt +members +power +football +law +voice +play +##in +near +park +history +30 +having +2005 +16 +##man +saw +mother +##al +army +point +front +help +english +street +art +late +hands +games +award +##ia +young +14 +put +published +country +division +across +told +13 +often +ever +french +london +center +six +red +2017 +led +days +include +light +25 +find +tell +among +species +really +according +central +half +2004 +form +original +gave +office +making +enough +lost +full +opened +must +included +live +given +german +player +run +business +woman +community +cup +might +million +land +2000 +court +development +17 +short +round +ii +km +seen +class +story +always +become +sure +research +almost +director +council +la +##2 +career +things +using +island +##z +couldn +car +##is +24 +close +force +##1 +better +free +support +control +field +students +2003 +education +married +##b +nothing +worked +others +record +big +inside +level +anything +continued +give +james +##3 +military +established +non +returned +feel +does +title +written +thing +feet +william +far +co +association +hard +already +2002 +##ra +championship +human +western +100 +##na +department +hall +role +various +production +21 +19 +heart +2001 +living +fire +version +##ers +##f +television +royal +##4 +produced +working +act +case +society +region +present +radio +period +looking +least +total +keep +england +wife +program +per +brother +mind +special +22 +##le +am +works +soon +##6 +political +george +services +taken +created +##7 +further +able +reached +david +union +joined +upon +done +important +social +information +either +##ic +##x +appeared +position +ground +lead +rock +dark +election +23 +board +france +hair +course +arms +site +police +girl +instead +real +sound +##v +words +moment +##te +someone +##8 +summer +project +announced +san +less +wrote +past +followed +##5 +blue +founded +al +finally +india +taking +records +america +##ne +1999 +design +considered +northern +god +stop +battle +toward +european +outside +described +track +today +playing +language +28 +call +26 +heard +professional +low +australia +miles +california +win +yet +green +##ie +trying +blood +##ton +southern +science +maybe +everything +match +square +27 +mouth +video +race +recorded +leave +above +##9 +daughter +points +space +1998 +museum +change +middle +common +##0 +move +tv +post +##ta +lake +seven +tried +elected +closed +ten +paul +minister +##th +months +start +chief +return +canada +person +sea +release +similar +modern +brought +rest +hit +formed +mr +##la +1997 +floor +event +doing +thomas +1996 +robert +care +killed +training +star +week +needed +turn +finished +railway +rather +news +health +sent +example +ran +term +michael +coming +currently +yes +forces +despite +gold +areas +50 +stage +fact +29 +dead +says +popular +2018 +originally +germany +probably +developed +result +pulled +friend +stood +money +running +mi +signed +word +songs +child +eventually +met +tour +average +teams +minutes +festival +current +deep +kind +1995 +decided +usually +eastern +seemed +##ness +episode +bed +added +table +indian +private +charles +route +available +idea +throughout +centre +addition +appointed +style +1994 +books +eight +construction +press +mean +wall +friends +remained +schools +study +##ch +##um +institute +oh +chinese +sometimes +events +possible +1992 +australian +type +brown +forward +talk +process +food +debut +seat +performance +committee +features +character +arts +herself +else +lot +strong +russian +range +hours +peter +arm +##da +morning +dr +sold +##ry +quickly +directed +1993 +guitar +china +##w +31 +list +##ma +performed +media +uk +players +smile +##rs +myself +40 +placed +coach +province +towards +wouldn +leading +whole +boy +official +designed +grand +census +##el +europe +attack +japanese +henry +1991 +##re +##os +cross +getting +alone +action +lower +network +wide +washington +japan +1990 +hospital +believe +changed +sister +##ar +hold +gone +sir +hadn +ship +##ka +studies +academy +shot +rights +below +base +bad +involved +kept +largest +##ist +bank +future +especially +beginning +mark +movement +section +female +magazine +plan +professor +lord +longer +##ian +sat +walked +hill +actually +civil +energy +model +families +size +thus +aircraft +completed +includes +data +captain +##or +fight +vocals +featured +richard +bridge +fourth +1989 +officer +stone +hear +##ism +means +medical +groups +management +self +lips +competition +entire +lived +technology +leaving +federal +tournament +bit +passed +hot +independent +awards +kingdom +mary +spent +fine +doesn +reported +##ling +jack +fall +raised +itself +stay +true +studio +1988 +sports +replaced +paris +systems +saint +leader +theatre +whose +market +capital +parents +spanish +canadian +earth +##ity +cut +degree +writing +bay +christian +awarded +natural +higher +bill +##as +coast +provided +previous +senior +ft +valley +organization +stopped +onto +countries +parts +conference +queen +security +interest +saying +allowed +master +earlier +phone +matter +smith +winning +try +happened +moving +campaign +los +##ley +breath +nearly +mid +1987 +certain +girls +date +italian +african +standing +fell +artist +##ted +shows +deal +mine +industry +1986 +##ng +everyone +republic +provide +collection +library +student +##ville +primary +owned +older +via +heavy +1st +makes +##able +attention +anyone +africa +##ri +stated +length +ended +fingers +command +staff +skin +foreign +opening +governor +okay +medal +kill +sun +cover +job +1985 +introduced +chest +hell +feeling +##ies +success +meet +reason +standard +meeting +novel +1984 +trade +source +buildings +##land +rose +guy +goal +##ur +chapter +native +husband +previously +unit +limited +entered +weeks +producer +operations +mountain +takes +covered +forced +related +roman +complete +successful +key +texas +cold +##ya +channel +1980 +traditional +films +dance +clear +approximately +500 +nine +van +prince +question +active +tracks +ireland +regional +silver +author +personal +sense +operation +##ine +economic +1983 +holding +twenty +isbn +additional +speed +hour +edition +regular +historic +places +whom +shook +movie +km² +secretary +prior +report +chicago +read +foundation +view +engine +scored +1982 +units +ask +airport +property +ready +immediately +lady +month +listed +contract +##de +manager +themselves +lines +##ki +navy +writer +meant +##ts +runs +##ro +practice +championships +singer +glass +commission +required +forest +starting +culture +generally +giving +access +attended +test +couple +stand +catholic +martin +caught +executive +##less +eye +##ey +thinking +chair +quite +shoulder +1979 +hope +decision +plays +defeated +municipality +whether +structure +offered +slowly +pain +ice +direction +##ion +paper +mission +1981 +mostly +200 +noted +individual +managed +nature +lives +plant +##ha +helped +except +studied +computer +figure +relationship +issue +significant +loss +die +smiled +gun +ago +highest +1972 +##am +male +bring +goals +mexico +problem +distance +commercial +completely +location +annual +famous +drive +1976 +neck +1978 +surface +caused +italy +understand +greek +highway +wrong +hotel +comes +appearance +joseph +double +issues +musical +companies +castle +income +review +assembly +bass +initially +parliament +artists +experience +1974 +particular +walk +foot +engineering +talking +window +dropped +##ter +miss +baby +boys +break +1975 +stars +edge +remember +policy +carried +train +stadium +bar +sex +angeles +evidence +##ge +becoming +assistant +soviet +1977 +upper +step +wing +1970 +youth +financial +reach +##ll +actor +numerous +##se +##st +nodded +arrived +##ation +minute +##nt +believed +sorry +complex +beautiful +victory +associated +temple +1968 +1973 +chance +perhaps +metal +##son +1945 +bishop +##et +lee +launched +particularly +tree +le +retired +subject +prize +contains +yeah +theory +empire +##ce +suddenly +waiting +trust +recording +##to +happy +terms +camp +champion +1971 +religious +pass +zealand +names +2nd +port +ancient +tom +corner +represented +watch +legal +anti +justice +cause +watched +brothers +45 +material +changes +simply +response +louis +fast +##ting +answer +60 +historical +1969 +stories +straight +create +feature +increased +rate +administration +virginia +el +activities +cultural +overall +winner +programs +basketball +legs +guard +beyond +cast +doctor +mm +flight +results +remains +cost +effect +winter +##ble +larger +islands +problems +chairman +grew +commander +isn +1967 +pay +failed +selected +hurt +fort +box +regiment +majority +journal +35 +edward +plans +##ke +##ni +shown +pretty +irish +characters +directly +scene +likely +operated +allow +spring +##j +junior +matches +looks +mike +houses +fellow +##tion +beach +marriage +##ham +##ive +rules +oil +65 +florida +expected +nearby +congress +sam +peace +recent +iii +wait +subsequently +cell +##do +variety +serving +agreed +please +poor +joe +pacific +attempt +wood +democratic +piece +prime +##ca +rural +mile +touch +appears +township +1964 +1966 +soldiers +##men +##ized +1965 +pennsylvania +closer +fighting +claimed +score +jones +physical +editor +##ous +filled +genus +specific +sitting +super +mom +##va +therefore +supported +status +fear +cases +store +meaning +wales +minor +spain +tower +focus +vice +frank +follow +parish +separate +golden +horse +fifth +remaining +branch +32 +presented +stared +##id +uses +secret +forms +##co +baseball +exactly +##ck +choice +note +discovered +travel +composed +truth +russia +ball +color +kiss +dad +wind +continue +ring +referred +numbers +digital +greater +##ns +metres +slightly +direct +increase +1960 +responsible +crew +rule +trees +troops +##no +broke +goes +individuals +hundred +weight +creek +sleep +memory +defense +provides +ordered +code +value +jewish +windows +1944 +safe +judge +whatever +corps +realized +growing +pre +##ga +cities +alexander +gaze +lies +spread +scott +letter +showed +situation +mayor +transport +watching +workers +extended +##li +expression +normal +##ment +chart +multiple +border +##ba +host +##ner +daily +mrs +walls +piano +##ko +heat +cannot +##ate +earned +products +drama +era +authority +seasons +join +grade +##io +sign +difficult +machine +1963 +territory +mainly +##wood +stations +squadron +1962 +stepped +iron +19th +##led +serve +appear +sky +speak +broken +charge +knowledge +kilometres +removed +ships +article +campus +simple +##ty +pushed +britain +##ve +leaves +recently +cd +soft +boston +latter +easy +acquired +poland +##sa +quality +officers +presence +planned +nations +mass +broadcast +jean +share +image +influence +wild +offer +emperor +electric +reading +headed +ability +promoted +yellow +ministry +1942 +throat +smaller +politician +##by +latin +spoke +cars +williams +males +lack +pop +80 +##ier +acting +seeing +consists +##ti +estate +1961 +pressure +johnson +newspaper +jr +chris +olympics +online +conditions +beat +elements +walking +vote +##field +needs +carolina +text +featuring +global +block +shirt +levels +francisco +purpose +females +et +dutch +duke +ahead +gas +twice +safety +serious +turning +highly +lieutenant +firm +maria +amount +mixed +daniel +proposed +perfect +agreement +affairs +3rd +seconds +contemporary +paid +1943 +prison +save +kitchen +label +administrative +intended +constructed +academic +nice +teacher +races +1956 +formerly +corporation +ben +nation +issued +shut +1958 +drums +housing +victoria +seems +opera +1959 +graduated +function +von +mentioned +picked +build +recognized +shortly +protection +picture +notable +exchange +elections +1980s +loved +percent +racing +fish +elizabeth +garden +volume +hockey +1941 +beside +settled +##ford +1940 +competed +replied +drew +1948 +actress +marine +scotland +steel +glanced +farm +steve +1957 +risk +tonight +positive +magic +singles +effects +gray +screen +dog +##ja +residents +bus +sides +none +secondary +literature +polish +destroyed +flying +founder +households +1939 +lay +reserve +usa +gallery +##ler +1946 +industrial +younger +approach +appearances +urban +ones +1950 +finish +avenue +powerful +fully +growth +page +honor +jersey +projects +advanced +revealed +basic +90 +infantry +pair +equipment +visit +33 +evening +search +grant +effort +solo +treatment +buried +republican +primarily +bottom +owner +1970s +israel +gives +jim +dream +bob +remain +spot +70 +notes +produce +champions +contact +ed +soul +accepted +ways +del +##ally +losing +split +price +capacity +basis +trial +questions +##ina +1955 +20th +guess +officially +memorial +naval +initial +##ization +whispered +median +engineer +##ful +sydney +##go +columbia +strength +300 +1952 +tears +senate +00 +card +asian +agent +1947 +software +44 +draw +warm +supposed +com +pro +##il +transferred +leaned +##at +candidate +escape +mountains +asia +potential +activity +entertainment +seem +traffic +jackson +murder +36 +slow +product +orchestra +haven +agency +bbc +taught +website +comedy +unable +storm +planning +albums +rugby +environment +scientific +grabbed +protect +##hi +boat +typically +1954 +1953 +damage +principal +divided +dedicated +mount +ohio +##berg +pick +fought +driver +##der +empty +shoulders +sort +thank +berlin +prominent +account +freedom +necessary +efforts +alex +headquarters +follows +alongside +des +simon +andrew +suggested +operating +learning +steps +1949 +sweet +technical +begin +easily +34 +teeth +speaking +settlement +scale +##sh +renamed +ray +max +enemy +semi +joint +compared +##rd +scottish +leadership +analysis +offers +georgia +pieces +captured +animal +deputy +guest +organized +##lin +tony +combined +method +challenge +1960s +huge +wants +battalion +sons +rise +crime +types +facilities +telling +path +1951 +platform +sit +1990s +##lo +tells +assigned +rich +pull +##ot +commonly +alive +##za +letters +concept +conducted +wearing +happen +bought +becomes +holy +gets +ocean +defeat +languages +purchased +coffee +occurred +titled +##q +declared +applied +sciences +concert +sounds +jazz +brain +##me +painting +fleet +tax +nick +##ius +michigan +count +animals +leaders +episodes +##line +content +##den +birth +##it +clubs +64 +palace +critical +refused +fair +leg +laughed +returning +surrounding +participated +formation +lifted +pointed +connected +rome +medicine +laid +taylor +santa +powers +adam +tall +shared +focused +knowing +yards +entrance +falls +##wa +calling +##ad +sources +chosen +beneath +resources +yard +##ite +nominated +silence +zone +defined +##que +gained +thirty +38 +bodies +moon +##ard +adopted +christmas +widely +register +apart +iran +premier +serves +du +unknown +parties +##les +generation +##ff +continues +quick +fields +brigade +quiet +teaching +clothes +impact +weapons +partner +flat +theater +supreme +1938 +37 +relations +##tor +plants +suffered +1936 +wilson +kids +begins +##age +1918 +seats +armed +internet +models +worth +laws +400 +communities +classes +background +knows +thanks +quarter +reaching +humans +carry +killing +format +kong +hong +setting +75 +architecture +disease +railroad +inc +possibly +wish +arthur +thoughts +harry +doors +density +##di +crowd +illinois +stomach +tone +unique +reports +anyway +##ir +liberal +der +vehicle +thick +dry +drug +faced +largely +facility +theme +holds +creation +strange +colonel +##mi +revolution +bell +politics +turns +silent +rail +relief +independence +combat +shape +write +determined +sales +learned +4th +finger +oxford +providing +1937 +heritage +fiction +situated +designated +allowing +distribution +hosted +##est +sight +interview +estimated +reduced +##ria +toronto +footballer +keeping +guys +damn +claim +motion +sport +sixth +stayed +##ze +en +rear +receive +handed +twelve +dress +audience +granted +brazil +##well +spirit +##ated +noticed +etc +olympic +representative +eric +tight +trouble +reviews +drink +vampire +missing +roles +ranked +newly +household +finals +wave +critics +##ee +phase +massachusetts +pilot +unlike +philadelphia +bright +guns +crown +organizations +roof +42 +respectively +clearly +tongue +marked +circle +fox +korea +bronze +brian +expanded +sexual +supply +yourself +inspired +labour +fc +##ah +reference +vision +draft +connection +brand +reasons +1935 +classic +driving +trip +jesus +cells +entry +1920 +neither +trail +claims +atlantic +orders +labor +nose +afraid +identified +intelligence +calls +cancer +attacked +passing +stephen +positions +imperial +grey +jason +39 +sunday +48 +swedish +avoid +extra +uncle +message +covers +allows +surprise +materials +fame +hunter +##ji +1930 +citizens +figures +davis +environmental +confirmed +shit +titles +di +performing +difference +acts +attacks +##ov +existing +votes +opportunity +nor +shop +entirely +trains +opposite +pakistan +##pa +develop +resulted +representatives +actions +reality +pressed +##ish +barely +wine +conversation +faculty +northwest +ends +documentary +nuclear +stock +grace +sets +eat +alternative +##ps +bag +resulting +creating +surprised +cemetery +1919 +drop +finding +sarah +cricket +streets +tradition +ride +1933 +exhibition +target +ear +explained +rain +composer +injury +apartment +municipal +educational +occupied +netherlands +clean +billion +constitution +learn +1914 +maximum +classical +francis +lose +opposition +jose +ontario +bear +core +hills +rolled +ending +drawn +permanent +fun +##tes +##lla +lewis +sites +chamber +ryan +##way +scoring +height +1934 +##house +lyrics +staring +55 +officials +1917 +snow +oldest +##tic +orange +##ger +qualified +interior +apparently +succeeded +thousand +dinner +lights +existence +fans +heavily +41 +greatest +conservative +send +bowl +plus +enter +catch +##un +economy +duty +1929 +speech +authorities +princess +performances +versions +shall +graduate +pictures +effective +remembered +poetry +desk +crossed +starring +starts +passenger +sharp +##ant +acres +ass +weather +falling +rank +fund +supporting +check +adult +publishing +heads +cm +southeast +lane +##burg +application +bc +##ura +les +condition +transfer +prevent +display +ex +regions +earl +federation +cool +relatively +answered +besides +1928 +obtained +portion +##town +mix +##ding +reaction +liked +dean +express +peak +1932 +##tte +counter +religion +chain +rare +miller +convention +aid +lie +vehicles +mobile +perform +squad +wonder +lying +crazy +sword +##ping +attempted +centuries +weren +philosophy +category +##ize +anna +interested +47 +sweden +wolf +frequently +abandoned +kg +literary +alliance +task +entitled +##ay +threw +promotion +factory +tiny +soccer +visited +matt +fm +achieved +52 +defence +internal +persian +43 +methods +##ging +arrested +otherwise +cambridge +programming +villages +elementary +districts +rooms +criminal +conflict +worry +trained +1931 +attempts +waited +signal +bird +truck +subsequent +programme +##ol +ad +49 +communist +details +faith +sector +patrick +carrying +laugh +##ss +controlled +korean +showing +origin +fuel +evil +1927 +##ent +brief +identity +darkness +address +pool +missed +publication +web +planet +ian +anne +wings +invited +##tt +briefly +standards +kissed +##be +ideas +climate +causing +walter +worse +albert +articles +winners +desire +aged +northeast +dangerous +gate +doubt +1922 +wooden +multi +##ky +poet +rising +funding +46 +communications +communication +violence +copies +prepared +ford +investigation +skills +1924 +pulling +electronic +##ak +##ial +##han +containing +ultimately +offices +singing +understanding +restaurant +tomorrow +fashion +christ +ward +da +pope +stands +5th +flow +studios +aired +commissioned +contained +exist +fresh +americans +##per +wrestling +approved +kid +employed +respect +suit +1925 +angel +asking +increasing +frame +angry +selling +1950s +thin +finds +##nd +temperature +statement +ali +explain +inhabitants +towns +extensive +narrow +51 +jane +flowers +images +promise +somewhere +object +fly +closely +##ls +1912 +bureau +cape +1926 +weekly +presidential +legislative +1921 +##ai +##au +launch +founding +##ny +978 +##ring +artillery +strike +un +institutions +roll +writers +landing +chose +kevin +anymore +pp +##ut +attorney +fit +dan +billboard +receiving +agricultural +breaking +sought +dave +admitted +lands +mexican +##bury +charlie +specifically +hole +iv +howard +credit +moscow +roads +accident +1923 +proved +wear +struck +hey +guards +stuff +slid +expansion +1915 +cat +anthony +##kin +melbourne +opposed +sub +southwest +architect +failure +plane +1916 +##ron +map +camera +tank +listen +regarding +wet +introduction +metropolitan +link +ep +fighter +inch +grown +gene +anger +fixed +buy +dvd +khan +domestic +worldwide +chapel +mill +functions +examples +##head +developing +1910 +turkey +hits +pocket +antonio +papers +grow +unless +circuit +18th +concerned +attached +journalist +selection +journey +converted +provincial +painted +hearing +aren +bands +negative +aside +wondered +knight +lap +survey +ma +##ow +noise +billy +##ium +shooting +guide +bedroom +priest +resistance +motor +homes +sounded +giant +##mer +150 +scenes +equal +comic +patients +hidden +solid +actual +bringing +afternoon +touched +funds +wedding +consisted +marie +canal +sr +kim +treaty +turkish +recognition +residence +cathedral +broad +knees +incident +shaped +fired +norwegian +handle +cheek +contest +represent +##pe +representing +beauty +##sen +birds +advantage +emergency +wrapped +drawing +notice +pink +broadcasting +##ong +somehow +bachelor +seventh +collected +registered +establishment +alan +assumed +chemical +personnel +roger +retirement +jeff +portuguese +wore +tied +device +threat +progress +advance +##ised +banks +hired +manchester +nfl +teachers +structures +forever +##bo +tennis +helping +saturday +sale +applications +junction +hip +incorporated +neighborhood +dressed +ceremony +##ds +influenced +hers +visual +stairs +decades +inner +kansas +hung +hoped +gain +scheduled +downtown +engaged +austria +clock +norway +certainly +pale +protected +1913 +victor +employees +plate +putting +surrounded +##ists +finishing +blues +tropical +##ries +minnesota +consider +philippines +accept +54 +retrieved +1900 +concern +anderson +properties +institution +gordon +successfully +vietnam +##dy +backing +outstanding +muslim +crossing +folk +producing +usual +demand +occurs +observed +lawyer +educated +##ana +kelly +string +pleasure +budget +items +quietly +colorado +philip +typical +##worth +derived +600 +survived +asks +mental +##ide +56 +jake +jews +distinguished +ltd +1911 +sri +extremely +53 +athletic +loud +thousands +worried +shadow +transportation +horses +weapon +arena +importance +users +tim +objects +contributed +dragon +douglas +aware +senator +johnny +jordan +sisters +engines +flag +investment +samuel +shock +capable +clark +row +wheel +refers +session +familiar +biggest +wins +hate +maintained +drove +hamilton +request +expressed +injured +underground +churches +walker +wars +tunnel +passes +stupid +agriculture +softly +cabinet +regarded +joining +indiana +##ea +##ms +push +dates +spend +behavior +woods +protein +gently +chase +morgan +mention +burning +wake +combination +occur +mirror +leads +jimmy +indeed +impossible +singapore +paintings +covering +##nes +soldier +locations +attendance +sell +historian +wisconsin +invasion +argued +painter +diego +changing +egypt +##don +experienced +inches +##ku +missouri +vol +grounds +spoken +switzerland +##gan +reform +rolling +ha +forget +massive +resigned +burned +allen +tennessee +locked +values +improved +##mo +wounded +universe +sick +dating +facing +pack +purchase +user +##pur +moments +##ul +merged +anniversary +1908 +coal +brick +understood +causes +dynasty +queensland +establish +stores +crisis +promote +hoping +views +cards +referee +extension +##si +raise +arizona +improve +colonial +formal +charged +##rt +palm +lucky +hide +rescue +faces +95 +feelings +candidates +juan +##ell +goods +6th +courses +weekend +59 +luke +cash +fallen +##om +delivered +affected +installed +carefully +tries +swiss +hollywood +costs +lincoln +responsibility +##he +shore +file +proper +normally +maryland +assistance +jump +constant +offering +friendly +waters +persons +realize +contain +trophy +800 +partnership +factor +58 +musicians +cry +bound +oregon +indicated +hero +houston +medium +##ure +consisting +somewhat +##ara +57 +cycle +##che +beer +moore +frederick +gotten +eleven +worst +weak +approached +arranged +chin +loan +universal +bond +fifteen +pattern +disappeared +##ney +translated +##zed +lip +arab +capture +interests +insurance +##chi +shifted +cave +prix +warning +sections +courts +coat +plot +smell +feed +golf +favorite +maintain +knife +vs +voted +degrees +finance +quebec +opinion +translation +manner +ruled +operate +productions +choose +musician +discovery +confused +tired +separated +stream +techniques +committed +attend +ranking +kings +throw +passengers +measure +horror +fan +mining +sand +danger +salt +calm +decade +dam +require +runner +##ik +rush +associate +greece +##ker +rivers +consecutive +matthew +##ski +sighed +sq +documents +steam +edited +closing +tie +accused +1905 +##ini +islamic +distributed +directors +organisation +bruce +7th +breathing +mad +lit +arrival +concrete +taste +08 +composition +shaking +faster +amateur +adjacent +stating +1906 +twin +flew +##ran +tokyo +publications +##tone +obviously +ridge +storage +1907 +carl +pages +concluded +desert +driven +universities +ages +terminal +sequence +borough +250 +constituency +creative +cousin +economics +dreams +margaret +notably +reduce +montreal +mode +17th +ears +saved +jan +vocal +##ica +1909 +andy +##jo +riding +roughly +threatened +##ise +meters +meanwhile +landed +compete +repeated +grass +czech +regularly +charges +tea +sudden +appeal +##ung +solution +describes +pierre +classification +glad +parking +##ning +belt +physics +99 +rachel +add +hungarian +participate +expedition +damaged +gift +childhood +85 +fifty +##red +mathematics +jumped +letting +defensive +mph +##ux +##gh +testing +##hip +hundreds +shoot +owners +matters +smoke +israeli +kentucky +dancing +mounted +grandfather +emma +designs +profit +argentina +##gs +truly +li +lawrence +cole +begun +detroit +willing +branches +smiling +decide +miami +enjoyed +recordings +##dale +poverty +ethnic +gay +##bi +gary +arabic +09 +accompanied +##one +##ons +fishing +determine +residential +acid +##ary +alice +returns +starred +mail +##ang +jonathan +strategy +##ue +net +forty +cook +businesses +equivalent +commonwealth +distinct +ill +##cy +seriously +##ors +##ped +shift +harris +replace +rio +imagine +formula +ensure +##ber +additionally +scheme +conservation +occasionally +purposes +feels +favor +##and +##ore +1930s +contrast +hanging +hunt +movies +1904 +instruments +victims +danish +christopher +busy +demon +sugar +earliest +colony +studying +balance +duties +##ks +belgium +slipped +carter +05 +visible +stages +iraq +fifa +##im +commune +forming +zero +07 +continuing +talked +counties +legend +bathroom +option +tail +clay +daughters +afterwards +severe +jaw +visitors +##ded +devices +aviation +russell +kate +##vi +entering +subjects +##ino +temporary +swimming +forth +smooth +ghost +audio +bush +operates +rocks +movements +signs +eddie +##tz +ann +voices +honorary +06 +memories +dallas +pure +measures +racial +promised +66 +harvard +ceo +16th +parliamentary +indicate +benefit +flesh +dublin +louisiana +1902 +1901 +patient +sleeping +1903 +membership +coastal +medieval +wanting +element +scholars +rice +62 +limit +survive +makeup +rating +definitely +collaboration +obvious +##tan +boss +ms +baron +birthday +linked +soil +diocese +##lan +ncaa +##mann +offensive +shell +shouldn +waist +##tus +plain +ross +organ +resolution +manufacturing +adding +relative +kennedy +98 +whilst +moth +marketing +gardens +crash +72 +heading +partners +credited +carlos +moves +cable +##zi +marshall +##out +depending +bottle +represents +rejected +responded +existed +04 +jobs +denmark +lock +##ating +treated +graham +routes +talent +commissioner +drugs +secure +tests +reign +restored +photography +##gi +contributions +oklahoma +designer +disc +grin +seattle +robin +paused +atlanta +unusual +##gate +praised +las +laughing +satellite +hungary +visiting +##sky +interesting +factors +deck +poems +norman +##water +stuck +speaker +rifle +domain +premiered +##her +dc +comics +actors +01 +reputation +eliminated +8th +ceiling +prisoners +script +##nce +leather +austin +mississippi +rapidly +admiral +parallel +charlotte +guilty +tools +gender +divisions +fruit +##bs +laboratory +nelson +fantasy +marry +rapid +aunt +tribe +requirements +aspects +suicide +amongst +adams +bone +ukraine +abc +kick +sees +edinburgh +clothing +column +rough +gods +hunting +broadway +gathered +concerns +##ek +spending +ty +12th +snapped +requires +solar +bones +cavalry +##tta +iowa +drinking +waste +index +franklin +charity +thompson +stewart +tip +flash +landscape +friday +enjoy +singh +poem +listening +##back +eighth +fred +differences +adapted +bomb +ukrainian +surgery +corporate +masters +anywhere +##more +waves +odd +sean +portugal +orleans +dick +debate +kent +eating +puerto +cleared +96 +expect +cinema +97 +guitarist +blocks +electrical +agree +involving +depth +dying +panel +struggle +##ged +peninsula +adults +novels +emerged +vienna +metro +debuted +shoes +tamil +songwriter +meets +prove +beating +instance +heaven +scared +sending +marks +artistic +passage +superior +03 +significantly +shopping +##tive +retained +##izing +malaysia +technique +cheeks +##ola +warren +maintenance +destroy +extreme +allied +120 +appearing +##yn +fill +advice +alabama +qualifying +policies +cleveland +hat +battery +smart +authors +10th +soundtrack +acted +dated +lb +glance +equipped +coalition +funny +outer +ambassador +roy +possibility +couples +campbell +dna +loose +ethan +supplies +1898 +gonna +88 +monster +##res +shake +agents +frequency +springs +dogs +practices +61 +gang +plastic +easier +suggests +gulf +blade +exposed +colors +industries +markets +pan +nervous +electoral +charts +legislation +ownership +##idae +mac +appointment +shield +copy +assault +socialist +abbey +monument +license +throne +employment +jay +93 +replacement +charter +cloud +powered +suffering +accounts +oak +connecticut +strongly +wright +colour +crystal +13th +context +welsh +networks +voiced +gabriel +jerry +##cing +forehead +mp +##ens +manage +schedule +totally +remix +##ii +forests +occupation +print +nicholas +brazilian +strategic +vampires +engineers +76 +roots +seek +correct +instrumental +und +alfred +backed +hop +##des +stanley +robinson +traveled +wayne +welcome +austrian +achieve +67 +exit +rates +1899 +strip +whereas +##cs +sing +deeply +adventure +bobby +rick +jamie +careful +components +cap +useful +personality +knee +##shi +pushing +hosts +02 +protest +ca +ottoman +symphony +##sis +63 +boundary +1890 +processes +considering +considerable +tons +##work +##ft +##nia +cooper +trading +dear +conduct +91 +illegal +apple +revolutionary +holiday +definition +harder +##van +jacob +circumstances +destruction +##lle +popularity +grip +classified +liverpool +donald +baltimore +flows +seeking +honour +approval +92 +mechanical +till +happening +statue +critic +increasingly +immediate +describe +commerce +stare +##ster +indonesia +meat +rounds +boats +baker +orthodox +depression +formally +worn +naked +claire +muttered +sentence +11th +emily +document +77 +criticism +wished +vessel +spiritual +bent +virgin +parker +minimum +murray +lunch +danny +printed +compilation +keyboards +false +blow +belonged +68 +raising +78 +cutting +##board +pittsburgh +##up +9th +shadows +81 +hated +indigenous +jon +15th +barry +scholar +ah +##zer +oliver +##gy +stick +susan +meetings +attracted +spell +romantic +##ver +ye +1895 +photo +demanded +customers +##ac +1896 +logan +revival +keys +modified +commanded +jeans +##ious +upset +raw +phil +detective +hiding +resident +vincent +##bly +experiences +diamond +defeating +coverage +lucas +external +parks +franchise +helen +bible +successor +percussion +celebrated +il +lift +profile +clan +romania +##ied +mills +##su +nobody +achievement +shrugged +fault +1897 +rhythm +initiative +breakfast +carbon +700 +69 +lasted +violent +74 +wound +ken +killer +gradually +filmed +°c +dollars +processing +94 +remove +criticized +guests +sang +chemistry +##vin +legislature +disney +##bridge +uniform +escaped +integrated +proposal +purple +denied +liquid +karl +influential +morris +nights +stones +intense +experimental +twisted +71 +84 +##ld +pace +nazi +mitchell +ny +blind +reporter +newspapers +14th +centers +burn +basin +forgotten +surviving +filed +collections +monastery +losses +manual +couch +description +appropriate +merely +tag +missions +sebastian +restoration +replacing +triple +73 +elder +julia +warriors +benjamin +julian +convinced +stronger +amazing +declined +versus +merchant +happens +output +finland +bare +barbara +absence +ignored +dawn +injuries +##port +producers +##ram +82 +luis +##ities +kw +admit +expensive +electricity +nba +exception +symbol +##ving +ladies +shower +sheriff +characteristics +##je +aimed +button +ratio +effectively +summit +angle +jury +bears +foster +vessels +pants +executed +evans +dozen +advertising +kicked +patrol +1889 +competitions +lifetime +principles +athletics +##logy +birmingham +sponsored +89 +rob +nomination +1893 +acoustic +##sm +creature +longest +##tra +credits +harbor +dust +josh +##so +territories +milk +infrastructure +completion +thailand +indians +leon +archbishop +##sy +assist +pitch +blake +arrangement +girlfriend +serbian +operational +hence +sad +scent +fur +dj +sessions +hp +refer +rarely +##ora +exists +1892 +##ten +scientists +dirty +penalty +burst +portrait +seed +79 +pole +limits +rival +1894 +stable +alpha +grave +constitutional +alcohol +arrest +flower +mystery +devil +architectural +relationships +greatly +habitat +##istic +larry +progressive +remote +cotton +##ics +##ok +preserved +reaches +##ming +cited +86 +vast +scholarship +decisions +cbs +joy +teach +1885 +editions +knocked +eve +searching +partly +participation +gap +animated +fate +excellent +##ett +na +87 +alternate +saints +youngest +##ily +climbed +##ita +##tors +suggest +##ct +discussion +staying +choir +lakes +jacket +revenue +nevertheless +peaked +instrument +wondering +annually +managing +neil +1891 +signing +terry +##ice +apply +clinical +brooklyn +aim +catherine +fuck +farmers +figured +ninth +pride +hugh +evolution +ordinary +involvement +comfortable +shouted +tech +encouraged +taiwan +representation +sharing +##lia +##em +panic +exact +cargo +competing +fat +cried +83 +1920s +occasions +pa +cabin +borders +utah +marcus +##isation +badly +muscles +##ance +victorian +transition +warner +bet +permission +##rin +slave +terrible +similarly +shares +seth +uefa +possession +medals +benefits +colleges +lowered +perfectly +mall +transit +##ye +##kar +publisher +##ened +harrison +deaths +elevation +##ae +asleep +machines +sigh +ash +hardly +argument +occasion +parent +leo +decline +1888 +contribution +##ua +concentration +1000 +opportunities +hispanic +guardian +extent +emotions +hips +mason +volumes +bloody +controversy +diameter +steady +mistake +phoenix +identify +violin +##sk +departure +richmond +spin +funeral +enemies +1864 +gear +literally +connor +random +sergeant +grab +confusion +1865 +transmission +informed +op +leaning +sacred +suspended +thinks +gates +portland +luck +agencies +yours +hull +expert +muscle +layer +practical +sculpture +jerusalem +latest +lloyd +statistics +deeper +recommended +warrior +arkansas +mess +supports +greg +eagle +1880 +recovered +rated +concerts +rushed +##ano +stops +eggs +files +premiere +keith +##vo +delhi +turner +pit +affair +belief +paint +##zing +mate +##ach +##ev +victim +##ology +withdrew +bonus +styles +fled +##ud +glasgow +technologies +funded +nbc +adaptation +##ata +portrayed +cooperation +supporters +judges +bernard +justin +hallway +ralph +##ick +graduating +controversial +distant +continental +spider +bite +##ho +recognize +intention +mixing +##ese +egyptian +bow +tourism +suppose +claiming +tiger +dominated +participants +vi +##ru +nurse +partially +tape +##rum +psychology +##rn +essential +touring +duo +voting +civilian +emotional +channels +##king +apparent +hebrew +1887 +tommy +carrier +intersection +beast +hudson +##gar +##zo +lab +nova +bench +discuss +costa +##ered +detailed +behalf +drivers +unfortunately +obtain +##lis +rocky +##dae +siege +friendship +honey +##rian +1861 +amy +hang +posted +governments +collins +respond +wildlife +preferred +operator +##po +laura +pregnant +videos +dennis +suspected +boots +instantly +weird +automatic +businessman +alleged +placing +throwing +ph +mood +1862 +perry +venue +jet +remainder +##lli +##ci +passion +biological +boyfriend +1863 +dirt +buffalo +ron +segment +fa +abuse +##era +genre +thrown +stroke +colored +stress +exercise +displayed +##gen +struggled +##tti +abroad +dramatic +wonderful +thereafter +madrid +component +widespread +##sed +tale +citizen +todd +monday +1886 +vancouver +overseas +forcing +crying +descent +##ris +discussed +substantial +ranks +regime +1870 +provinces +switch +drum +zane +ted +tribes +proof +lp +cream +researchers +volunteer +manor +silk +milan +donated +allies +venture +principle +delivery +enterprise +##ves +##ans +bars +traditionally +witch +reminded +copper +##uk +pete +inter +links +colin +grinned +elsewhere +competitive +frequent +##oy +scream +##hu +tension +texts +submarine +finnish +defending +defend +pat +detail +1884 +affiliated +stuart +themes +villa +periods +tool +belgian +ruling +crimes +answers +folded +licensed +resort +demolished +hans +lucy +1881 +lion +traded +photographs +writes +craig +##fa +trials +generated +beth +noble +debt +percentage +yorkshire +erected +ss +viewed +grades +confidence +ceased +islam +telephone +retail +##ible +chile +m² +roberts +sixteen +##ich +commented +hampshire +innocent +dual +pounds +checked +regulations +afghanistan +sung +rico +liberty +assets +bigger +options +angels +relegated +tribute +wells +attending +leaf +##yan +butler +romanian +forum +monthly +lisa +patterns +gmina +##tory +madison +hurricane +rev +##ians +bristol +##ula +elite +valuable +disaster +democracy +awareness +germans +freyja +##ins +loop +absolutely +paying +populations +maine +sole +prayer +spencer +releases +doorway +bull +##ani +lover +midnight +conclusion +##sson +thirteen +lily +mediterranean +##lt +nhl +proud +sample +##hill +drummer +guinea +##ova +murphy +climb +##ston +instant +attributed +horn +ain +railways +steven +##ao +autumn +ferry +opponent +root +traveling +secured +corridor +stretched +tales +sheet +trinity +cattle +helps +indicates +manhattan +murdered +fitted +1882 +gentle +grandmother +mines +shocked +vegas +produces +##light +caribbean +##ou +belong +continuous +desperate +drunk +historically +trio +waved +raf +dealing +nathan +bat +murmured +interrupted +residing +scientist +pioneer +harold +aaron +##net +delta +attempting +minority +mini +believes +chorus +tend +lots +eyed +indoor +load +shots +updated +jail +##llo +concerning +connecting +wealth +##ved +slaves +arrive +rangers +sufficient +rebuilt +##wick +cardinal +flood +muhammad +whenever +relation +runners +moral +repair +viewers +arriving +revenge +punk +assisted +bath +fairly +breathe +lists +innings +illustrated +whisper +nearest +voters +clinton +ties +ultimate +screamed +beijing +lions +andre +fictional +gathering +comfort +radar +suitable +dismissed +hms +ban +pine +wrist +atmosphere +voivodeship +bid +timber +##ned +##nan +giants +##ane +cameron +recovery +uss +identical +categories +switched +serbia +laughter +noah +ensemble +therapy +peoples +touching +##off +locally +pearl +platforms +everywhere +ballet +tables +lanka +herbert +outdoor +toured +derek +1883 +spaces +contested +swept +1878 +exclusive +slight +connections +##dra +winds +prisoner +collective +bangladesh +tube +publicly +wealthy +thai +##ys +isolated +select +##ric +insisted +pen +fortune +ticket +spotted +reportedly +animation +enforcement +tanks +110 +decides +wider +lowest +owen +##time +nod +hitting +##hn +gregory +furthermore +magazines +fighters +solutions +##ery +pointing +requested +peru +reed +chancellor +knights +mask +worker +eldest +flames +reduction +1860 +volunteers +##tis +reporting +##hl +wire +advisory +endemic +origins +settlers +pursue +knock +consumer +1876 +eu +compound +creatures +mansion +sentenced +ivan +deployed +guitars +frowned +involves +mechanism +kilometers +perspective +shops +maps +terminus +duncan +alien +fist +bridges +##pers +heroes +fed +derby +swallowed +##ros +patent +sara +illness +characterized +adventures +slide +hawaii +jurisdiction +##op +organised +##side +adelaide +walks +biology +se +##ties +rogers +swing +tightly +boundaries +##rie +prepare +implementation +stolen +##sha +certified +colombia +edwards +garage +##mm +recalled +##ball +rage +harm +nigeria +breast +##ren +furniture +pupils +settle +##lus +cuba +balls +client +alaska +21st +linear +thrust +celebration +latino +genetic +terror +##cia +##ening +lightning +fee +witness +lodge +establishing +skull +##ique +earning +hood +##ei +rebellion +wang +sporting +warned +missile +devoted +activist +porch +worship +fourteen +package +1871 +decorated +##shire +housed +##ock +chess +sailed +doctors +oscar +joan +treat +garcia +harbour +jeremy +##ire +traditions +dominant +jacques +##gon +##wan +relocated +1879 +amendment +sized +companion +simultaneously +volleyball +spun +acre +increases +stopping +loves +belongs +affect +drafted +tossed +scout +battles +1875 +filming +shoved +munich +tenure +vertical +romance +pc +##cher +argue +##ical +craft +ranging +www +opens +honest +tyler +yesterday +virtual +##let +muslims +reveal +snake +immigrants +radical +screaming +speakers +firing +saving +belonging +ease +lighting +prefecture +blame +farmer +hungry +grows +rubbed +beam +sur +subsidiary +##cha +armenian +sao +dropping +conventional +##fer +microsoft +reply +qualify +spots +1867 +sweat +festivals +##ken +immigration +physician +discover +exposure +sandy +explanation +isaac +implemented +##fish +hart +initiated +connect +stakes +presents +heights +householder +pleased +tourist +regardless +slip +closest +##ction +surely +sultan +brings +riley +preparation +aboard +slammed +baptist +experiment +ongoing +interstate +organic +playoffs +##ika +1877 +130 +##tar +hindu +error +tours +tier +plenty +arrangements +talks +trapped +excited +sank +ho +athens +1872 +denver +welfare +suburb +athletes +trick +diverse +belly +exclusively +yelled +1868 +##med +conversion +##ette +1874 +internationally +computers +conductor +abilities +sensitive +hello +dispute +measured +globe +rocket +prices +amsterdam +flights +tigers +inn +municipalities +emotion +references +3d +##mus +explains +airlines +manufactured +pm +archaeological +1873 +interpretation +devon +comment +##ites +settlements +kissing +absolute +improvement +suite +impressed +barcelona +sullivan +jefferson +towers +jesse +julie +##tin +##lu +grandson +hi +gauge +regard +rings +interviews +trace +raymond +thumb +departments +burns +serial +bulgarian +scores +demonstrated +##ix +1866 +kyle +alberta +underneath +romanized +##ward +relieved +acquisition +phrase +cliff +reveals +han +cuts +merger +custom +##dar +nee +gilbert +graduation +##nts +assessment +cafe +difficulty +demands +swung +democrat +jennifer +commons +1940s +grove +##yo +completing +focuses +sum +substitute +bearing +stretch +reception +##py +reflected +essentially +destination +pairs +##ched +survival +resource +##bach +promoting +doubles +messages +tear +##down +##fully +parade +florence +harvey +incumbent +partial +framework +900 +pedro +frozen +procedure +olivia +controls +##mic +shelter +personally +temperatures +##od +brisbane +tested +sits +marble +comprehensive +oxygen +leonard +##kov +inaugural +iranian +referring +quarters +attitude +##ivity +mainstream +lined +mars +dakota +norfolk +unsuccessful +##° +explosion +helicopter +congressional +##sing +inspector +bitch +seal +departed +divine +##ters +coaching +examination +punishment +manufacturer +sink +columns +unincorporated +signals +nevada +squeezed +dylan +dining +photos +martial +manuel +eighteen +elevator +brushed +plates +ministers +ivy +congregation +##len +slept +specialized +taxes +curve +restricted +negotiations +likes +statistical +arnold +inspiration +execution +bold +intermediate +significance +margin +ruler +wheels +gothic +intellectual +dependent +listened +eligible +buses +widow +syria +earn +cincinnati +collapsed +recipient +secrets +accessible +philippine +maritime +goddess +clerk +surrender +breaks +playoff +database +##ified +##lon +ideal +beetle +aspect +soap +regulation +strings +expand +anglo +shorter +crosses +retreat +tough +coins +wallace +directions +pressing +##oon +shipping +locomotives +comparison +topics +nephew +##mes +distinction +honors +travelled +sierra +ibn +##over +fortress +sa +recognised +carved +1869 +clients +##dan +intent +##mar +coaches +describing +bread +##ington +beaten +northwestern +##ona +merit +youtube +collapse +challenges +em +historians +objective +submitted +virus +attacking +drake +assume +##ere +diseases +marc +stem +leeds +##cus +##ab +farming +glasses +##lock +visits +nowhere +fellowship +relevant +carries +restaurants +experiments +101 +constantly +bases +targets +shah +tenth +opponents +verse +territorial +##ira +writings +corruption +##hs +instruction +inherited +reverse +emphasis +##vic +employee +arch +keeps +rabbi +watson +payment +uh +##ala +nancy +##tre +venice +fastest +sexy +banned +adrian +properly +ruth +touchdown +dollar +boards +metre +circles +edges +favour +comments +ok +travels +liberation +scattered +firmly +##ular +holland +permitted +diesel +kenya +den +originated +##ral +demons +resumed +dragged +rider +##rus +servant +blinked +extend +torn +##ias +##sey +input +meal +everybody +cylinder +kinds +camps +##fe +bullet +logic +##wn +croatian +evolved +healthy +fool +chocolate +wise +preserve +pradesh +##ess +respective +1850 +##ew +chicken +artificial +gross +corresponding +convicted +cage +caroline +dialogue +##dor +narrative +stranger +mario +br +christianity +failing +trent +commanding +buddhist +1848 +maurice +focusing +yale +bike +altitude +##ering +mouse +revised +##sley +veteran +##ig +pulls +theology +crashed +campaigns +legion +##ability +drag +excellence +customer +cancelled +intensity +excuse +##lar +liga +participating +contributing +printing +##burn +variable +##rk +curious +bin +legacy +renaissance +##my +symptoms +binding +vocalist +dancer +##nie +grammar +gospel +democrats +ya +enters +sc +diplomatic +hitler +##ser +clouds +mathematical +quit +defended +oriented +##heim +fundamental +hardware +impressive +equally +convince +confederate +guilt +chuck +sliding +##ware +magnetic +narrowed +petersburg +bulgaria +otto +phd +skill +##ama +reader +hopes +pitcher +reservoir +hearts +automatically +expecting +mysterious +bennett +extensively +imagined +seeds +monitor +fix +##ative +journalism +struggling +signature +ranch +encounter +photographer +observation +protests +##pin +influences +##hr +calendar +##all +cruz +croatia +locomotive +hughes +naturally +shakespeare +basement +hook +uncredited +faded +theories +approaches +dare +phillips +filling +fury +obama +##ain +efficient +arc +deliver +min +raid +breeding +inducted +leagues +efficiency +axis +montana +eagles +##ked +supplied +instructions +karen +picking +indicating +trap +anchor +practically +christians +tomb +vary +occasional +electronics +lords +readers +newcastle +faint +innovation +collect +situations +engagement +160 +claude +mixture +##feld +peer +tissue +logo +lean +##ration +°f +floors +##ven +architects +reducing +##our +##ments +rope +1859 +ottawa +##har +samples +banking +declaration +proteins +resignation +francois +saudi +advocate +exhibited +armor +twins +divorce +##ras +abraham +reviewed +jo +temporarily +matrix +physically +pulse +curled +##ena +difficulties +bengal +usage +##ban +annie +riders +certificate +##pi +holes +warsaw +distinctive +jessica +##mon +mutual +1857 +customs +circular +eugene +removal +loaded +mere +vulnerable +depicted +generations +dame +heir +enormous +lightly +climbing +pitched +lessons +pilots +nepal +ram +google +preparing +brad +louise +renowned +##₂ +liam +##ably +plaza +shaw +sophie +brilliant +bills +##bar +##nik +fucking +mainland +server +pleasant +seized +veterans +jerked +fail +beta +brush +radiation +stored +warmth +southeastern +nate +sin +raced +berkeley +joke +athlete +designation +trunk +##low +roland +qualification +archives +heels +artwork +receives +judicial +reserves +##bed +woke +installation +abu +floating +fake +lesser +excitement +interface +concentrated +addressed +characteristic +amanda +saxophone +monk +auto +##bus +releasing +egg +dies +interaction +defender +ce +outbreak +glory +loving +##bert +sequel +consciousness +http +awake +ski +enrolled +##ress +handling +rookie +brow +somebody +biography +warfare +amounts +contracts +presentation +fabric +dissolved +challenged +meter +psychological +lt +elevated +rally +accurate +##tha +hospitals +undergraduate +specialist +venezuela +exhibit +shed +nursing +protestant +fluid +structural +footage +jared +consistent +prey +##ska +succession +reflect +exile +lebanon +wiped +suspect +shanghai +resting +integration +preservation +marvel +variant +pirates +sheep +rounded +capita +sailing +colonies +manuscript +deemed +variations +clarke +functional +emerging +boxing +relaxed +curse +azerbaijan +heavyweight +nickname +editorial +rang +grid +tightened +earthquake +flashed +miguel +rushing +##ches +improvements +boxes +brooks +180 +consumption +molecular +felix +societies +repeatedly +variation +aids +civic +graphics +professionals +realm +autonomous +receiver +delayed +workshop +militia +chairs +trump +canyon +##point +harsh +extending +lovely +happiness +##jan +stake +eyebrows +embassy +wellington +hannah +##ella +sony +corners +bishops +swear +cloth +contents +xi +namely +commenced +1854 +stanford +nashville +courage +graphic +commitment +garrison +##bin +hamlet +clearing +rebels +attraction +literacy +cooking +ruins +temples +jenny +humanity +celebrate +hasn +freight +sixty +rebel +bastard +##art +newton +##ada +deer +##ges +##ching +smiles +delaware +singers +##ets +approaching +assists +flame +##ph +boulevard +barrel +planted +##ome +pursuit +##sia +consequences +posts +shallow +invitation +rode +depot +ernest +kane +rod +concepts +preston +topic +chambers +striking +blast +arrives +descendants +montgomery +ranges +worlds +##lay +##ari +span +chaos +praise +##ag +fewer +1855 +sanctuary +mud +fbi +##ions +programmes +maintaining +unity +harper +bore +handsome +closure +tournaments +thunder +nebraska +linda +facade +puts +satisfied +argentine +dale +cork +dome +panama +##yl +1858 +tasks +experts +##ates +feeding +equation +##las +##ida +##tu +engage +bryan +##ax +um +quartet +melody +disbanded +sheffield +blocked +gasped +delay +kisses +maggie +connects +##non +sts +poured +creator +publishers +##we +guided +ellis +extinct +hug +gaining +##ord +complicated +##bility +poll +clenched +investigate +##use +thereby +quantum +spine +cdp +humor +kills +administered +semifinals +##du +encountered +ignore +##bu +commentary +##maker +bother +roosevelt +140 +plains +halfway +flowing +cultures +crack +imprisoned +neighboring +airline +##ses +##view +##mate +##ec +gather +wolves +marathon +transformed +##ill +cruise +organisations +carol +punch +exhibitions +numbered +alarm +ratings +daddy +silently +##stein +queens +colours +impression +guidance +liu +tactical +##rat +marshal +della +arrow +##ings +rested +feared +tender +owns +bitter +advisor +escort +##ides +spare +farms +grants +##ene +dragons +encourage +colleagues +cameras +##und +sucked +pile +spirits +prague +statements +suspension +landmark +fence +torture +recreation +bags +permanently +survivors +pond +spy +predecessor +bombing +coup +##og +protecting +transformation +glow +##lands +##book +dug +priests +andrea +feat +barn +jumping +##chen +##ologist +##con +casualties +stern +auckland +pipe +serie +revealing +ba +##bel +trevor +mercy +spectrum +yang +consist +governing +collaborated +possessed +epic +comprises +blew +shane +##ack +lopez +honored +magical +sacrifice +judgment +perceived +hammer +mtv +baronet +tune +das +missionary +sheets +350 +neutral +oral +threatening +attractive +shade +aims +seminary +##master +estates +1856 +michel +wounds +refugees +manufacturers +##nic +mercury +syndrome +porter +##iya +##din +hamburg +identification +upstairs +purse +widened +pause +cared +breathed +affiliate +santiago +prevented +celtic +fisher +125 +recruited +byzantine +reconstruction +farther +##mp +diet +sake +au +spite +sensation +##ert +blank +separation +105 +##hon +vladimir +armies +anime +##lie +accommodate +orbit +cult +sofia +archive +##ify +##box +founders +sustained +disorder +honours +northeastern +mia +crops +violet +threats +blanket +fires +canton +followers +southwestern +prototype +voyage +assignment +altered +moderate +protocol +pistol +##eo +questioned +brass +lifting +1852 +math +authored +##ual +doug +dimensional +dynamic +##san +1851 +pronounced +grateful +quest +uncomfortable +boom +presidency +stevens +relating +politicians +chen +barrier +quinn +diana +mosque +tribal +cheese +palmer +portions +sometime +chester +treasure +wu +bend +download +millions +reforms +registration +##osa +consequently +monitoring +ate +preliminary +brandon +invented +ps +eaten +exterior +intervention +ports +documented +log +displays +lecture +sally +favourite +##itz +vermont +lo +invisible +isle +breed +##ator +journalists +relay +speaks +backward +explore +midfielder +actively +stefan +procedures +cannon +blond +kenneth +centered +servants +chains +libraries +malcolm +essex +henri +slavery +##hal +facts +fairy +coached +cassie +cats +washed +cop +##fi +announcement +item +2000s +vinyl +activated +marco +frontier +growled +curriculum +##das +loyal +accomplished +leslie +ritual +kenny +##00 +vii +napoleon +hollow +hybrid +jungle +stationed +friedrich +counted +##ulated +platinum +theatrical +seated +col +rubber +glen +1840 +diversity +healing +extends +id +provisions +administrator +columbus +##oe +tributary +te +assured +org +##uous +prestigious +examined +lectures +grammy +ronald +associations +bailey +allan +essays +flute +believing +consultant +proceedings +travelling +1853 +kit +kerala +yugoslavia +buddy +methodist +##ith +burial +centres +batman +##nda +discontinued +bo +dock +stockholm +lungs +severely +##nk +citing +manga +##ugh +steal +mumbai +iraqi +robot +celebrity +bride +broadcasts +abolished +pot +joel +overhead +franz +packed +reconnaissance +johann +acknowledged +introduce +handled +doctorate +developments +drinks +alley +palestine +##nis +##aki +proceeded +recover +bradley +grain +patch +afford +infection +nationalist +legendary +##ath +interchange +virtually +gen +gravity +exploration +amber +vital +wishes +powell +doctrine +elbow +screenplay +##bird +contribute +indonesian +pet +creates +##com +enzyme +kylie +discipline +drops +manila +hunger +##ien +layers +suffer +fever +bits +monica +keyboard +manages +##hood +searched +appeals +##bad +testament +grande +reid +##war +beliefs +congo +##ification +##dia +si +requiring +##via +casey +1849 +regret +streak +rape +depends +syrian +sprint +pound +tourists +upcoming +pub +##xi +tense +##els +practiced +echo +nationwide +guild +motorcycle +liz +##zar +chiefs +desired +elena +bye +precious +absorbed +relatives +booth +pianist +##mal +citizenship +exhausted +wilhelm +##ceae +##hed +noting +quarterback +urge +hectares +##gue +ace +holly +##tal +blonde +davies +parked +sustainable +stepping +twentieth +airfield +galaxy +nest +chip +##nell +tan +shaft +paulo +requirement +##zy +paradise +tobacco +trans +renewed +vietnamese +##cker +##ju +suggesting +catching +holmes +enjoying +md +trips +colt +holder +butterfly +nerve +reformed +cherry +bowling +trailer +carriage +goodbye +appreciate +toy +joshua +interactive +enabled +involve +##kan +collar +determination +bunch +facebook +recall +shorts +superintendent +episcopal +frustration +giovanni +nineteenth +laser +privately +array +circulation +##ovic +armstrong +deals +painful +permit +discrimination +##wi +aires +retiring +cottage +ni +##sta +horizon +ellen +jamaica +ripped +fernando +chapters +playstation +patron +lecturer +navigation +behaviour +genes +georgian +export +solomon +rivals +swift +seventeen +rodriguez +princeton +independently +sox +1847 +arguing +entity +casting +hank +criteria +oakland +geographic +milwaukee +reflection +expanding +conquest +dubbed +##tv +halt +brave +brunswick +doi +arched +curtis +divorced +predominantly +somerset +streams +ugly +zoo +horrible +curved +buenos +fierce +dictionary +vector +theological +unions +handful +stability +chan +punjab +segments +##lly +altar +ignoring +gesture +monsters +pastor +##stone +thighs +unexpected +operators +abruptly +coin +compiled +associates +improving +migration +pin +##ose +compact +collegiate +reserved +##urs +quarterfinals +roster +restore +assembled +hurry +oval +##cies +1846 +flags +martha +##del +victories +sharply +##rated +argues +deadly +neo +drawings +symbols +performer +##iel +griffin +restrictions +editing +andrews +java +journals +arabia +compositions +dee +pierce +removing +hindi +casino +runway +civilians +minds +nasa +hotels +##zation +refuge +rent +retain +potentially +conferences +suburban +conducting +##tto +##tions +##tle +descended +massacre +##cal +ammunition +terrain +fork +souls +counts +chelsea +durham +drives +cab +##bank +perth +realizing +palestinian +finn +simpson +##dal +betty +##ule +moreover +particles +cardinals +tent +evaluation +extraordinary +##oid +inscription +##works +wednesday +chloe +maintains +panels +ashley +trucks +##nation +cluster +sunlight +strikes +zhang +##wing +dialect +canon +##ap +tucked +##ws +collecting +##mas +##can +##sville +maker +quoted +evan +franco +aria +buying +cleaning +eva +closet +provision +apollo +clinic +rat +##ez +necessarily +ac +##gle +##ising +venues +flipped +cent +spreading +trustees +checking +authorized +##sco +disappointed +##ado +notion +duration +trumpet +hesitated +topped +brussels +rolls +theoretical +hint +define +aggressive +repeat +wash +peaceful +optical +width +allegedly +mcdonald +strict +copyright +##illa +investors +mar +jam +witnesses +sounding +miranda +michelle +privacy +hugo +harmony +##pp +valid +lynn +glared +nina +102 +headquartered +diving +boarding +gibson +##ncy +albanian +marsh +routine +dealt +enhanced +er +intelligent +substance +targeted +enlisted +discovers +spinning +observations +pissed +smoking +rebecca +capitol +visa +varied +costume +seemingly +indies +compensation +surgeon +thursday +arsenal +westminster +suburbs +rid +anglican +##ridge +knots +foods +alumni +lighter +fraser +whoever +portal +scandal +##ray +gavin +advised +instructor +flooding +terrorist +##ale +teenage +interim +senses +duck +teen +thesis +abby +eager +overcome +##ile +newport +glenn +rises +shame +##cc +prompted +priority +forgot +bomber +nicolas +protective +360 +cartoon +katherine +breeze +lonely +trusted +henderson +richardson +relax +banner +candy +palms +remarkable +##rio +legends +cricketer +essay +ordained +edmund +rifles +trigger +##uri +##away +sail +alert +1830 +audiences +penn +sussex +siblings +pursued +indianapolis +resist +rosa +consequence +succeed +avoided +1845 +##ulation +inland +##tie +##nna +counsel +profession +chronicle +hurried +##una +eyebrow +eventual +bleeding +innovative +cure +##dom +committees +accounting +con +scope +hardy +heather +tenor +gut +herald +codes +tore +scales +wagon +##oo +luxury +tin +prefer +fountain +triangle +bonds +darling +convoy +dried +traced +beings +troy +accidentally +slam +findings +smelled +joey +lawyers +outcome +steep +bosnia +configuration +shifting +toll +brook +performers +lobby +philosophical +construct +shrine +aggregate +boot +cox +phenomenon +savage +insane +solely +reynolds +lifestyle +##ima +nationally +holdings +consideration +enable +edgar +mo +mama +##tein +fights +relegation +chances +atomic +hub +conjunction +awkward +reactions +currency +finale +kumar +underwent +steering +elaborate +gifts +comprising +melissa +veins +reasonable +sunshine +chi +solve +trails +inhabited +elimination +ethics +huh +ana +molly +consent +apartments +layout +marines +##ces +hunters +bulk +##oma +hometown +##wall +##mont +cracked +reads +neighbouring +withdrawn +admission +wingspan +damned +anthology +lancashire +brands +batting +forgive +cuban +awful +##lyn +104 +dimensions +imagination +##ade +dante +##ship +tracking +desperately +goalkeeper +##yne +groaned +workshops +confident +burton +gerald +milton +circus +uncertain +slope +copenhagen +sophia +fog +philosopher +portraits +accent +cycling +varying +gripped +larvae +garrett +specified +scotia +mature +luther +kurt +rap +##kes +aerial +750 +ferdinand +heated +es +transported +##shan +safely +nonetheless +##orn +##gal +motors +demanding +##sburg +startled +##brook +ally +generate +caps +ghana +stained +demo +mentions +beds +ap +afterward +diary +##bling +utility +##iro +richards +1837 +conspiracy +conscious +shining +footsteps +observer +cyprus +urged +loyalty +developer +probability +olive +upgraded +gym +miracle +insects +graves +1844 +ourselves +hydrogen +amazon +katie +tickets +poets +##pm +planes +##pan +prevention +witnessed +dense +jin +randy +tang +warehouse +monroe +bang +archived +elderly +investigations +alec +granite +mineral +conflicts +controlling +aboriginal +carlo +##zu +mechanics +stan +stark +rhode +skirt +est +##berry +bombs +respected +##horn +imposed +limestone +deny +nominee +memphis +grabbing +disabled +##als +amusement +aa +frankfurt +corn +referendum +varies +slowed +disk +firms +unconscious +incredible +clue +sue +##zhou +twist +##cio +joins +idaho +chad +developers +computing +destroyer +103 +mortal +tucker +kingston +choices +yu +carson +1800 +os +whitney +geneva +pretend +dimension +staged +plateau +maya +##une +freestyle +##bc +rovers +hiv +##ids +tristan +classroom +prospect +##hus +honestly +diploma +lied +thermal +auxiliary +feast +unlikely +iata +##tel +morocco +pounding +treasury +lithuania +considerably +1841 +dish +1812 +geological +matching +stumbled +destroying +marched +brien +advances +cake +nicole +belle +settling +measuring +directing +##mie +tuesday +bassist +capabilities +stunned +fraud +torpedo +##list +##phone +anton +wisdom +surveillance +ruined +##ulate +lawsuit +healthcare +theorem +halls +trend +aka +horizontal +dozens +acquire +lasting +swim +hawk +gorgeous +fees +vicinity +decrease +adoption +tactics +##ography +pakistani +##ole +draws +##hall +willie +burke +heath +algorithm +integral +powder +elliott +brigadier +jackie +tate +varieties +darker +##cho +lately +cigarette +specimens +adds +##ree +##ensis +##inger +exploded +finalist +cia +murders +wilderness +arguments +nicknamed +acceptance +onwards +manufacture +robertson +jets +tampa +enterprises +blog +loudly +composers +nominations +1838 +ai +malta +inquiry +automobile +hosting +viii +rays +tilted +grief +museums +strategies +furious +euro +equality +cohen +poison +surrey +wireless +governed +ridiculous +moses +##esh +##room +vanished +##ito +barnes +attract +morrison +istanbul +##iness +absent +rotation +petition +janet +##logical +satisfaction +custody +deliberately +observatory +comedian +surfaces +pinyin +novelist +strictly +canterbury +oslo +monks +embrace +ibm +jealous +photograph +continent +dorothy +marina +doc +excess +holden +allegations +explaining +stack +avoiding +lance +storyline +majesty +poorly +spike +dos +bradford +raven +travis +classics +proven +voltage +pillow +fists +butt +1842 +interpreted +##car +1839 +gage +telegraph +lens +promising +expelled +casual +collector +zones +##min +silly +nintendo +##kh +##bra +downstairs +chef +suspicious +afl +flies +vacant +uganda +pregnancy +condemned +lutheran +estimates +cheap +decree +saxon +proximity +stripped +idiot +deposits +contrary +presenter +magnus +glacier +im +offense +edwin +##ori +upright +##long +bolt +##ois +toss +geographical +##izes +environments +delicate +marking +abstract +xavier +nails +windsor +plantation +occurring +equity +saskatchewan +fears +drifted +sequences +vegetation +revolt +##stic +1843 +sooner +fusion +opposing +nato +skating +1836 +secretly +ruin +lease +##oc +edit +##nne +flora +anxiety +ruby +##ological +##mia +tel +bout +taxi +emmy +frost +rainbow +compounds +foundations +rainfall +assassination +nightmare +dominican +##win +achievements +deserve +orlando +intact +armenia +##nte +calgary +valentine +106 +marion +proclaimed +theodore +bells +courtyard +thigh +gonzalez +console +troop +minimal +monte +everyday +##ence +##if +supporter +terrorism +buck +openly +presbyterian +activists +carpet +##iers +rubbing +uprising +##yi +cute +conceived +legally +##cht +millennium +cello +velocity +ji +rescued +cardiff +1835 +rex +concentrate +senators +beard +rendered +glowing +battalions +scouts +competitors +sculptor +catalogue +arctic +ion +raja +bicycle +wow +glancing +lawn +##woman +gentleman +lighthouse +publish +predicted +calculated +##val +variants +##gne +strain +##ui +winston +deceased +##nus +touchdowns +brady +caleb +sinking +echoed +crush +hon +blessed +protagonist +hayes +endangered +magnitude +editors +##tine +estimate +responsibilities +##mel +backup +laying +consumed +sealed +zurich +lovers +frustrated +##eau +ahmed +kicking +mit +treasurer +1832 +biblical +refuse +terrified +pump +agrees +genuine +imprisonment +refuses +plymouth +##hen +lou +##nen +tara +trembling +antarctic +ton +learns +##tas +crap +crucial +faction +atop +##borough +wrap +lancaster +odds +hopkins +erik +lyon +##eon +bros +##ode +snap +locality +tips +empress +crowned +cal +acclaimed +chuckled +##ory +clara +sends +mild +towel +##fl +##day +##а +wishing +assuming +interviewed +##bal +##die +interactions +eden +cups +helena +##lf +indie +beck +##fire +batteries +filipino +wizard +parted +##lam +traces +##born +rows +idol +albany +delegates +##ees +##sar +discussions +##ex +notre +instructed +belgrade +highways +suggestion +lauren +possess +orientation +alexandria +abdul +beats +salary +reunion +ludwig +alright +wagner +intimate +pockets +slovenia +hugged +brighton +merchants +cruel +stole +trek +slopes +repairs +enrollment +politically +underlying +promotional +counting +boeing +##bb +isabella +naming +##и +keen +bacteria +listing +separately +belfast +ussr +450 +lithuanian +anybody +ribs +sphere +martinez +cock +embarrassed +proposals +fragments +nationals +##fs +##wski +premises +fin +1500 +alpine +matched +freely +bounded +jace +sleeve +##af +gaming +pier +populated +evident +##like +frances +flooded +##dle +frightened +pour +trainer +framed +visitor +challenging +pig +wickets +##fold +infected +email +##pes +arose +##aw +reward +ecuador +oblast +vale +ch +shuttle +##usa +bach +rankings +forbidden +cornwall +accordance +salem +consumers +bruno +fantastic +toes +machinery +resolved +julius +remembering +propaganda +iceland +bombardment +tide +contacts +wives +##rah +concerto +macdonald +albania +implement +daisy +tapped +sudan +helmet +angela +mistress +##lic +crop +sunk +finest +##craft +hostile +##ute +##tsu +boxer +fr +paths +adjusted +habit +ballot +supervision +soprano +##zen +bullets +wicked +sunset +regiments +disappear +lamp +performs +app +##gia +##oa +rabbit +digging +incidents +entries +##cion +dishes +##oi +introducing +##ati +##fied +freshman +slot +jill +tackles +baroque +backs +##iest +lone +sponsor +destiny +altogether +convert +##aro +consensus +shapes +demonstration +basically +feminist +auction +artifacts +##bing +strongest +twitter +halifax +2019 +allmusic +mighty +smallest +precise +alexandra +viola +##los +##ille +manuscripts +##illo +dancers +ari +managers +monuments +blades +barracks +springfield +maiden +consolidated +electron +##end +berry +airing +wheat +nobel +inclusion +blair +payments +geography +bee +cc +eleanor +react +##hurst +afc +manitoba +##yu +su +lineup +fitness +recreational +investments +airborne +disappointment +##dis +edmonton +viewing +##row +renovation +##cast +infant +bankruptcy +roses +aftermath +pavilion +##yer +carpenter +withdrawal +ladder +##hy +discussing +popped +reliable +agreements +rochester +##abad +curves +bombers +220 +rao +reverend +decreased +choosing +107 +stiff +consulting +naples +crawford +tracy +ka +ribbon +cops +##lee +crushed +deciding +unified +teenager +accepting +flagship +explorer +poles +sanchez +inspection +revived +skilled +induced +exchanged +flee +locals +tragedy +swallow +loading +hanna +demonstrate +##ela +salvador +flown +contestants +civilization +##ines +wanna +rhodes +fletcher +hector +knocking +considers +##ough +nash +mechanisms +sensed +mentally +walt +unclear +##eus +renovated +madame +##cks +crews +governmental +##hin +undertaken +monkey +##ben +##ato +fatal +armored +copa +caves +governance +grasp +perception +certification +froze +damp +tugged +wyoming +##rg +##ero +newman +##lor +nerves +curiosity +graph +115 +##ami +withdraw +tunnels +dull +meredith +moss +exhibits +neighbors +communicate +accuracy +explored +raiders +republicans +secular +kat +superman +penny +criticised +##tch +freed +update +conviction +wade +ham +likewise +delegation +gotta +doll +promises +technological +myth +nationality +resolve +convent +##mark +sharon +dig +sip +coordinator +entrepreneur +fold +##dine +capability +councillor +synonym +blown +swan +cursed +1815 +jonas +haired +sofa +canvas +keeper +rivalry +##hart +rapper +speedway +swords +postal +maxwell +estonia +potter +recurring +##nn +##ave +errors +##oni +cognitive +1834 +##² +claws +nadu +roberto +bce +wrestler +ellie +##ations +infinite +ink +##tia +presumably +finite +staircase +108 +noel +patricia +nacional +##cation +chill +eternal +tu +preventing +prussia +fossil +limbs +##logist +ernst +frog +perez +rene +##ace +pizza +prussian +##ios +##vy +molecules +regulatory +answering +opinions +sworn +lengths +supposedly +hypothesis +upward +habitats +seating +ancestors +drank +yield +hd +synthesis +researcher +modest +##var +mothers +peered +voluntary +homeland +##the +acclaim +##igan +static +valve +luxembourg +alto +carroll +fe +receptor +norton +ambulance +##tian +johnston +catholics +depicting +jointly +elephant +gloria +mentor +badge +ahmad +distinguish +remarked +councils +precisely +allison +advancing +detection +crowded +##10 +cooperative +ankle +mercedes +dagger +surrendered +pollution +commit +subway +jeffrey +lesson +sculptures +provider +##fication +membrane +timothy +rectangular +fiscal +heating +teammate +basket +particle +anonymous +deployment +##ple +missiles +courthouse +proportion +shoe +sec +##ller +complaints +forbes +blacks +abandon +remind +sizes +overwhelming +autobiography +natalie +##awa +risks +contestant +countryside +babies +scorer +invaded +enclosed +proceed +hurling +disorders +##cu +reflecting +continuously +cruiser +graduates +freeway +investigated +ore +deserved +maid +blocking +phillip +jorge +shakes +dove +mann +variables +lacked +burden +accompanying +que +consistently +organizing +provisional +complained +endless +##rm +tubes +juice +georges +krishna +mick +labels +thriller +##uch +laps +arcade +sage +snail +##table +shannon +fi +laurence +seoul +vacation +presenting +hire +churchill +surprisingly +prohibited +savannah +technically +##oli +170 +##lessly +testimony +suited +speeds +toys +romans +mlb +flowering +measurement +talented +kay +settings +charleston +expectations +shattered +achieving +triumph +ceremonies +portsmouth +lanes +mandatory +loser +stretching +cologne +realizes +seventy +cornell +careers +webb +##ulating +americas +budapest +ava +suspicion +##ison +yo +conrad +##hai +sterling +jessie +rector +##az +1831 +transform +organize +loans +christine +volcanic +warrant +slender +summers +subfamily +newer +danced +dynamics +rhine +proceeds +heinrich +gastropod +commands +sings +facilitate +easter +ra +positioned +responses +expense +fruits +yanked +imported +25th +velvet +vic +primitive +tribune +baldwin +neighbourhood +donna +rip +hay +pr +##uro +1814 +espn +welcomed +##aria +qualifier +glare +highland +timing +##cted +shells +eased +geometry +louder +exciting +slovakia +##sion +##iz +##lot +savings +prairie +##ques +marching +rafael +tonnes +##lled +curtain +preceding +shy +heal +greene +worthy +##pot +detachment +bury +sherman +##eck +reinforced +seeks +bottles +contracted +duchess +outfit +walsh +##sc +mickey +##ase +geoffrey +archer +squeeze +dawson +eliminate +invention +##enberg +neal +##eth +stance +dealer +coral +maple +retire +polo +simplified +##ht +1833 +hid +watts +backwards +jules +##oke +genesis +mt +frames +rebounds +burma +woodland +moist +santos +whispers +drained +subspecies +##aa +streaming +ulster +burnt +correspondence +maternal +gerard +denis +stealing +##load +genius +duchy +##oria +inaugurated +momentum +suits +placement +sovereign +clause +thames +##hara +confederation +reservation +sketch +yankees +lets +rotten +charm +hal +verses +ultra +commercially +dot +salon +citation +adopt +winnipeg +mist +allocated +cairo +##boy +jenkins +interference +objectives +##wind +1820 +portfolio +armoured +sectors +##eh +initiatives +##world +integrity +exercises +robe +tap +ab +gazed +##tones +distracted +rulers +111 +favorable +jerome +tended +cart +factories +##eri +diplomat +valued +gravel +charitable +##try +calvin +exploring +chang +shepherd +terrace +pdf +pupil +##ural +reflects +ups +##rch +governors +shelf +depths +##nberg +trailed +crest +tackle +##nian +##ats +hatred +##kai +clare +makers +ethiopia +longtime +detected +embedded +lacking +slapped +rely +thomson +anticipation +iso +morton +successive +agnes +screenwriter +straightened +philippe +playwright +haunted +licence +iris +intentions +sutton +112 +logical +correctly +##weight +branded +licked +tipped +silva +ricky +narrator +requests +##ents +greeted +supernatural +cow +##wald +lung +refusing +employer +strait +gaelic +liner +##piece +zoe +sabha +##mba +driveway +harvest +prints +bates +reluctantly +threshold +algebra +ira +wherever +coupled +240 +assumption +picks +##air +designers +raids +gentlemen +##ean +roller +blowing +leipzig +locks +screw +dressing +strand +##lings +scar +dwarf +depicts +##nu +nods +##mine +differ +boris +##eur +yuan +flip +##gie +mob +invested +questioning +applying +##ture +shout +##sel +gameplay +blamed +illustrations +bothered +weakness +rehabilitation +##of +##zes +envelope +rumors +miners +leicester +subtle +kerry +##ico +ferguson +##fu +premiership +ne +##cat +bengali +prof +catches +remnants +dana +##rily +shouting +presidents +baltic +ought +ghosts +dances +sailors +shirley +fancy +dominic +##bie +madonna +##rick +bark +buttons +gymnasium +ashes +liver +toby +oath +providence +doyle +evangelical +nixon +cement +carnegie +embarked +hatch +surroundings +guarantee +needing +pirate +essence +##bee +filter +crane +hammond +projected +immune +percy +twelfth +##ult +regent +doctoral +damon +mikhail +##ichi +lu +critically +elect +realised +abortion +acute +screening +mythology +steadily +##fc +frown +nottingham +kirk +wa +minneapolis +##rra +module +algeria +mc +nautical +encounters +surprising +statues +availability +shirts +pie +alma +brows +munster +mack +soup +crater +tornado +sanskrit +cedar +explosive +bordered +dixon +planets +stamp +exam +happily +##bble +carriers +kidnapped +##vis +accommodation +emigrated +##met +knockout +correspondent +violation +profits +peaks +lang +specimen +agenda +ancestry +pottery +spelling +equations +obtaining +ki +linking +1825 +debris +asylum +##20 +buddhism +teddy +##ants +gazette +##nger +##sse +dental +eligibility +utc +fathers +averaged +zimbabwe +francesco +coloured +hissed +translator +lynch +mandate +humanities +mackenzie +uniforms +lin +##iana +##gio +asset +mhz +fitting +samantha +genera +wei +rim +beloved +shark +riot +entities +expressions +indo +carmen +slipping +owing +abbot +neighbor +sidney +##av +rats +recommendations +encouraging +squadrons +anticipated +commanders +conquered +##oto +donations +diagnosed +##mond +divide +##iva +guessed +decoration +vernon +auditorium +revelation +conversations +##kers +##power +herzegovina +dash +alike +protested +lateral +herman +accredited +mg +##gent +freeman +mel +fiji +crow +crimson +##rine +livestock +##pped +humanitarian +bored +oz +whip +##lene +##ali +legitimate +alter +grinning +spelled +anxious +oriental +wesley +##nin +##hole +carnival +controller +detect +##ssa +bowed +educator +kosovo +macedonia +##sin +occupy +mastering +stephanie +janeiro +para +unaware +nurses +noon +135 +cam +hopefully +ranger +combine +sociology +polar +rica +##eer +neill +##sman +holocaust +##ip +doubled +lust +1828 +109 +decent +cooling +unveiled +##card +1829 +nsw +homer +chapman +meyer +##gin +dive +mae +reagan +expertise +##gled +darwin +brooke +sided +prosecution +investigating +comprised +petroleum +genres +reluctant +differently +trilogy +johns +vegetables +corpse +highlighted +lounge +pension +unsuccessfully +elegant +aided +ivory +beatles +amelia +cain +dubai +sunny +immigrant +babe +click +##nder +underwater +pepper +combining +mumbled +atlas +horns +accessed +ballad +physicians +homeless +gestured +rpm +freak +louisville +corporations +patriots +prizes +rational +warn +modes +decorative +overnight +din +troubled +phantom +##ort +monarch +sheer +##dorf +generals +guidelines +organs +addresses +##zon +enhance +curling +parishes +cord +##kie +linux +caesar +deutsche +bavaria +##bia +coleman +cyclone +##eria +bacon +petty +##yama +##old +hampton +diagnosis +1824 +throws +complexity +rita +disputed +##₃ +pablo +##sch +marketed +trafficking +##ulus +examine +plague +formats +##oh +vault +faithful +##bourne +webster +##ox +highlights +##ient +##ann +phones +vacuum +sandwich +modeling +##gated +bolivia +clergy +qualities +isabel +##nas +##ars +wears +screams +reunited +annoyed +bra +##ancy +##rate +differential +transmitter +tattoo +container +poker +##och +excessive +resides +cowboys +##tum +augustus +trash +providers +statute +retreated +balcony +reversed +void +storey +preceded +masses +leap +laughs +neighborhoods +wards +schemes +falcon +santo +battlefield +pad +ronnie +thread +lesbian +venus +##dian +beg +sandstone +daylight +punched +gwen +analog +stroked +wwe +acceptable +measurements +dec +toxic +##kel +adequate +surgical +economist +parameters +varsity +##sberg +quantity +ella +##chy +##rton +countess +generating +precision +diamonds +expressway +ga +##ı +1821 +uruguay +talents +galleries +expenses +scanned +colleague +outlets +ryder +lucien +##ila +paramount +##bon +syracuse +dim +fangs +gown +sweep +##sie +toyota +missionaries +websites +##nsis +sentences +adviser +val +trademark +spells +##plane +patience +starter +slim +##borg +toe +incredibly +shoots +elliot +nobility +##wyn +cowboy +endorsed +gardner +tendency +persuaded +organisms +emissions +kazakhstan +amused +boring +chips +themed +##hand +llc +constantinople +chasing +systematic +guatemala +borrowed +erin +carey +##hard +highlands +struggles +1810 +##ifying +##ced +wong +exceptions +develops +enlarged +kindergarten +castro +##ern +##rina +leigh +zombie +juvenile +##most +consul +##nar +sailor +hyde +clarence +intensive +pinned +nasty +useless +jung +clayton +stuffed +exceptional +ix +apostolic +230 +transactions +##dge +exempt +swinging +cove +religions +##ash +shields +dairy +bypass +190 +pursuing +bug +joyce +bombay +chassis +southampton +chat +interact +redesignated +##pen +nascar +pray +salmon +rigid +regained +malaysian +grim +publicity +constituted +capturing +toilet +delegate +purely +tray +drift +loosely +striker +weakened +trinidad +mitch +itv +defines +transmitted +ming +scarlet +nodding +fitzgerald +fu +narrowly +sp +tooth +standings +virtue +##₁ +##wara +##cting +chateau +gloves +lid +##nel +hurting +conservatory +##pel +sinclair +reopened +sympathy +nigerian +strode +advocated +optional +chronic +discharge +##rc +suck +compatible +laurel +stella +shi +fails +wage +dodge +128 +informal +sorts +levi +buddha +villagers +##aka +chronicles +heavier +summoned +gateway +3000 +eleventh +jewelry +translations +accordingly +seas +##ency +fiber +pyramid +cubic +dragging +##ista +caring +##ops +android +contacted +lunar +##dt +kai +lisbon +patted +1826 +sacramento +theft +madagascar +subtropical +disputes +ta +holidays +piper +willow +mare +cane +itunes +newfoundland +benny +companions +dong +raj +observe +roar +charming +plaque +tibetan +fossils +enacted +manning +bubble +tina +tanzania +##eda +##hir +funk +swamp +deputies +cloak +ufc +scenario +par +scratch +metals +anthem +guru +engaging +specially +##boat +dialects +nineteen +cecil +duet +disability +messenger +unofficial +##lies +defunct +eds +moonlight +drainage +surname +puzzle +honda +switching +conservatives +mammals +knox +broadcaster +sidewalk +cope +##ried +benson +princes +peterson +##sal +bedford +sharks +eli +wreck +alberto +gasp +archaeology +lgbt +teaches +securities +madness +compromise +waving +coordination +davidson +visions +leased +possibilities +eighty +jun +fernandez +enthusiasm +assassin +sponsorship +reviewer +kingdoms +estonian +laboratories +##fy +##nal +applies +verb +celebrations +##zzo +rowing +lightweight +sadness +submit +mvp +balanced +dude +##vas +explicitly +metric +magnificent +mound +brett +mohammad +mistakes +irregular +##hing +##ass +sanders +betrayed +shipped +surge +##enburg +reporters +termed +georg +pity +verbal +bulls +abbreviated +enabling +appealed +##are +##atic +sicily +sting +heel +sweetheart +bart +spacecraft +brutal +monarchy +##tter +aberdeen +cameo +diane +##ub +survivor +clyde +##aries +complaint +##makers +clarinet +delicious +chilean +karnataka +coordinates +1818 +panties +##rst +pretending +ar +dramatically +kiev +bella +tends +distances +113 +catalog +launching +instances +telecommunications +portable +lindsay +vatican +##eim +angles +aliens +marker +stint +screens +bolton +##rne +judy +wool +benedict +plasma +europa +spark +imaging +filmmaker +swiftly +##een +contributor +##nor +opted +stamps +apologize +financing +butter +gideon +sophisticated +alignment +avery +chemicals +yearly +speculation +prominence +professionally +##ils +immortal +institutional +inception +wrists +identifying +tribunal +derives +gains +##wo +papal +preference +linguistic +vince +operative +brewery +##ont +unemployment +boyd +##ured +##outs +albeit +prophet +1813 +bi +##rr +##face +##rad +quarterly +asteroid +cleaned +radius +temper +##llen +telugu +jerk +viscount +menu +##ote +glimpse +##aya +yacht +hawaiian +baden +##rl +laptop +readily +##gu +monetary +offshore +scots +watches +##yang +##arian +upgrade +needle +xbox +lea +encyclopedia +flank +fingertips +##pus +delight +teachings +confirm +roth +beaches +midway +winters +##iah +teasing +daytime +beverly +gambling +bonnie +##backs +regulated +clement +hermann +tricks +knot +##shing +##uring +##vre +detached +ecological +owed +specialty +byron +inventor +bats +stays +screened +unesco +midland +trim +affection +##ander +##rry +jess +thoroughly +feedback +##uma +chennai +strained +heartbeat +wrapping +overtime +pleaded +##sworth +mon +leisure +oclc +##tate +##ele +feathers +angelo +thirds +nuts +surveys +clever +gill +commentator +##dos +darren +rides +gibraltar +##nc +##mu +dissolution +dedication +shin +meals +saddle +elvis +reds +chaired +taller +appreciation +functioning +niece +favored +advocacy +robbie +criminals +suffolk +yugoslav +passport +constable +congressman +hastings +vera +##rov +consecrated +sparks +ecclesiastical +confined +##ovich +muller +floyd +nora +1822 +paved +1827 +cumberland +ned +saga +spiral +##flow +appreciated +yi +collaborative +treating +similarities +feminine +finishes +##ib +jade +import +##nse +##hot +champagne +mice +securing +celebrities +helsinki +attributes +##gos +cousins +phases +ache +lucia +gandhi +submission +vicar +spear +shine +tasmania +biting +detention +constitute +tighter +seasonal +##gus +terrestrial +matthews +##oka +effectiveness +parody +philharmonic +##onic +1816 +strangers +encoded +consortium +guaranteed +regards +shifts +tortured +collision +supervisor +inform +broader +insight +theaters +armour +emeritus +blink +incorporates +mapping +##50 +##ein +handball +flexible +##nta +substantially +generous +thief +##own +carr +loses +1793 +prose +ucla +romeo +generic +metallic +realization +damages +mk +commissioners +zach +default +##ther +helicopters +lengthy +stems +spa +partnered +spectators +rogue +indication +penalties +teresa +1801 +sen +##tric +dalton +##wich +irving +photographic +##vey +dell +deaf +peters +excluded +unsure +##vable +patterson +crawled +##zio +resided +whipped +latvia +slower +ecole +pipes +employers +maharashtra +comparable +va +textile +pageant +##gel +alphabet +binary +irrigation +chartered +choked +antoine +offs +waking +supplement +##wen +quantities +demolition +regain +locate +urdu +folks +alt +114 +##mc +scary +andreas +whites +##ava +classrooms +mw +aesthetic +publishes +valleys +guides +cubs +johannes +bryant +conventions +affecting +##itt +drain +awesome +isolation +prosecutor +ambitious +apology +captive +downs +atmospheric +lorenzo +aisle +beef +foul +##onia +kidding +composite +disturbed +illusion +natives +##ffer +emi +rockets +riverside +wartime +painters +adolf +melted +##ail +uncertainty +simulation +hawks +progressed +meantime +builder +spray +breach +unhappy +regina +russians +##urg +determining +##tation +tram +1806 +##quin +aging +##12 +1823 +garion +rented +mister +diaz +terminated +clip +1817 +depend +nervously +disco +owe +defenders +shiva +notorious +disbelief +shiny +worcester +##gation +##yr +trailing +undertook +islander +belarus +limitations +watershed +fuller +overlooking +utilized +raphael +1819 +synthetic +breakdown +klein +##nate +moaned +memoir +lamb +practicing +##erly +cellular +arrows +exotic +##graphy +witches +117 +charted +rey +hut +hierarchy +subdivision +freshwater +giuseppe +aloud +reyes +qatar +marty +sideways +utterly +sexually +jude +prayers +mccarthy +softball +blend +damien +##gging +##metric +wholly +erupted +lebanese +negro +revenues +tasted +comparative +teamed +transaction +labeled +maori +sovereignty +parkway +trauma +gran +malay +121 +advancement +descendant +2020 +buzz +salvation +inventory +symbolic +##making +antarctica +mps +##gas +##bro +mohammed +myanmar +holt +submarines +tones +##lman +locker +patriarch +bangkok +emerson +remarks +predators +kin +afghan +confession +norwich +rental +emerge +advantages +##zel +rca +##hold +shortened +storms +aidan +##matic +autonomy +compliance +##quet +dudley +atp +##osis +1803 +motto +documentation +summary +professors +spectacular +christina +archdiocese +flashing +innocence +remake +##dell +psychic +reef +scare +employ +rs +sticks +meg +gus +leans +##ude +accompany +bergen +tomas +##iko +doom +wages +pools +##nch +##bes +breasts +scholarly +alison +outline +brittany +breakthrough +willis +realistic +##cut +##boro +competitor +##stan +pike +picnic +icon +designing +commercials +washing +villain +skiing +micro +costumes +auburn +halted +executives +##hat +logistics +cycles +vowel +applicable +barrett +exclaimed +eurovision +eternity +ramon +##umi +##lls +modifications +sweeping +disgust +##uck +torch +aviv +ensuring +rude +dusty +sonic +donovan +outskirts +cu +pathway +##band +##gun +##lines +disciplines +acids +cadet +paired +##40 +sketches +##sive +marriages +##⁺ +folding +peers +slovak +implies +admired +##beck +1880s +leopold +instinct +attained +weston +megan +horace +##ination +dorsal +ingredients +evolutionary +##its +complications +deity +lethal +brushing +levy +deserted +institutes +posthumously +delivering +telescope +coronation +motivated +rapids +luc +flicked +pays +volcano +tanner +weighed +##nica +crowds +frankie +gifted +addressing +granddaughter +winding +##rna +constantine +gomez +##front +landscapes +rudolf +anthropology +slate +werewolf +##lio +astronomy +circa +rouge +dreaming +sack +knelt +drowned +naomi +prolific +tracked +freezing +herb +##dium +agony +randall +twisting +wendy +deposit +touches +vein +wheeler +##bbled +##bor +batted +retaining +tire +presently +compare +specification +daemon +nigel +##grave +merry +recommendation +czechoslovakia +sandra +ng +roma +##sts +lambert +inheritance +sheikh +winchester +cries +examining +##yle +comeback +cuisine +nave +##iv +ko +retrieve +tomatoes +barker +polished +defining +irene +lantern +personalities +begging +tract +swore +1809 +175 +##gic +omaha +brotherhood +##rley +haiti +##ots +exeter +##ete +##zia +steele +dumb +pearson +210 +surveyed +elisabeth +trends +##ef +fritz +##rf +premium +bugs +fraction +calmly +viking +##birds +tug +inserted +unusually +##ield +confronted +distress +crashing +brent +turks +resign +##olo +cambodia +gabe +sauce +##kal +evelyn +116 +extant +clusters +quarry +teenagers +luna +##lers +##ister +affiliation +drill +##ashi +panthers +scenic +libya +anita +strengthen +inscriptions +##cated +lace +sued +judith +riots +##uted +mint +##eta +preparations +midst +dub +challenger +##vich +mock +cf +displaced +wicket +breaths +enables +schmidt +analyst +##lum +ag +highlight +automotive +axe +josef +newark +sufficiently +resembles +50th +##pal +flushed +mum +traits +##ante +commodore +incomplete +warming +titular +ceremonial +ethical +118 +celebrating +eighteenth +cao +lima +medalist +mobility +strips +snakes +##city +miniature +zagreb +barton +escapes +umbrella +automated +doubted +differs +cooled +georgetown +dresden +cooked +fade +wyatt +rna +jacobs +carlton +abundant +stereo +boost +madras +inning +##hia +spur +ip +malayalam +begged +osaka +groan +escaping +charging +dose +vista +##aj +bud +papa +communists +advocates +edged +tri +##cent +resemble +peaking +necklace +fried +montenegro +saxony +goose +glances +stuttgart +curator +recruit +grocery +sympathetic +##tting +##fort +127 +lotus +randolph +ancestor +##rand +succeeding +jupiter +1798 +macedonian +##heads +hiking +1808 +handing +fischer +##itive +garbage +node +##pies +prone +singular +papua +inclined +attractions +italia +pouring +motioned +grandma +garnered +jacksonville +corp +ego +ringing +aluminum +##hausen +ordering +##foot +drawer +traders +synagogue +##play +##kawa +resistant +wandering +fragile +fiona +teased +var +hardcore +soaked +jubilee +decisive +exposition +mercer +poster +valencia +hale +kuwait +1811 +##ises +##wr +##eed +tavern +gamma +122 +johan +##uer +airways +amino +gil +##ury +vocational +domains +torres +##sp +generator +folklore +outcomes +##keeper +canberra +shooter +fl +beams +confrontation +##lling +##gram +feb +aligned +forestry +pipeline +jax +motorway +conception +decay +##tos +coffin +##cott +stalin +1805 +escorted +minded +##nam +sitcom +purchasing +twilight +veronica +additions +passive +tensions +straw +123 +frequencies +1804 +refugee +cultivation +##iate +christie +clary +bulletin +crept +disposal +##rich +##zong +processor +crescent +##rol +bmw +emphasized +whale +nazis +aurora +##eng +dwelling +hauled +sponsors +toledo +mega +ideology +theatres +tessa +cerambycidae +saves +turtle +cone +suspects +kara +rusty +yelling +greeks +mozart +shades +cocked +participant +##tro +shire +spit +freeze +necessity +##cos +inmates +nielsen +councillors +loaned +uncommon +omar +peasants +botanical +offspring +daniels +formations +jokes +1794 +pioneers +sigma +licensing +##sus +wheelchair +polite +1807 +liquor +pratt +trustee +##uta +forewings +balloon +##zz +kilometre +camping +explicit +casually +shawn +foolish +teammates +nm +hassan +carrie +judged +satisfy +vanessa +knives +selective +cnn +flowed +##lice +eclipse +stressed +eliza +mathematician +cease +cultivated +##roy +commissions +browns +##ania +destroyers +sheridan +meadow +##rius +minerals +##cial +downstream +clash +gram +memoirs +ventures +baha +seymour +archie +midlands +edith +fare +flynn +invite +canceled +tiles +stabbed +boulder +incorporate +amended +camden +facial +mollusk +unreleased +descriptions +yoga +grabs +550 +raises +ramp +shiver +##rose +coined +pioneering +tunes +qing +warwick +tops +119 +melanie +giles +##rous +wandered +##inal +annexed +nov +30th +unnamed +##ished +organizational +airplane +normandy +stoke +whistle +blessing +violations +chased +holders +shotgun +##ctic +outlet +reactor +##vik +tires +tearing +shores +fortified +mascot +constituencies +nc +columnist +productive +tibet +##rta +lineage +hooked +oct +tapes +judging +cody +##gger +hansen +kashmir +triggered +##eva +solved +cliffs +##tree +resisted +anatomy +protesters +transparent +implied +##iga +injection +mattress +excluding +##mbo +defenses +helpless +devotion +##elli +growl +liberals +weber +phenomena +atoms +plug +##iff +mortality +apprentice +howe +convincing +aaa +swimmer +barber +leone +promptly +sodium +def +nowadays +arise +##oning +gloucester +corrected +dignity +norm +erie +##ders +elders +evacuated +sylvia +compression +##yar +hartford +pose +backpack +reasoning +accepts +24th +wipe +millimetres +marcel +##oda +dodgers +albion +1790 +overwhelmed +aerospace +oaks +1795 +showcase +acknowledge +recovering +nolan +ashe +hurts +geology +fashioned +disappearance +farewell +swollen +shrug +marquis +wimbledon +124 +rue +1792 +commemorate +reduces +experiencing +inevitable +calcutta +intel +##court +murderer +sticking +fisheries +imagery +bloom +280 +brake +##inus +gustav +hesitation +memorable +po +viral +beans +accidents +tunisia +antenna +spilled +consort +treatments +aye +perimeter +##gard +donation +hostage +migrated +banker +addiction +apex +lil +trout +##ously +conscience +##nova +rams +sands +genome +passionate +troubles +##lets +##set +amid +##ibility +##ret +higgins +exceed +vikings +##vie +payne +##zan +muscular +##ste +defendant +sucking +##wal +ibrahim +fuselage +claudia +vfl +europeans +snails +interval +##garh +preparatory +statewide +tasked +lacrosse +viktor +##lation +angola +##hra +flint +implications +employs +teens +patrons +stall +weekends +barriers +scrambled +nucleus +tehran +jenna +parsons +lifelong +robots +displacement +5000 +##bles +precipitation +##gt +knuckles +clutched +1802 +marrying +ecology +marx +accusations +declare +scars +kolkata +mat +meadows +bermuda +skeleton +finalists +vintage +crawl +coordinate +affects +subjected +orchestral +mistaken +##tc +mirrors +dipped +relied +260 +arches +candle +##nick +incorporating +wildly +fond +basilica +owl +fringe +rituals +whispering +stirred +feud +tertiary +slick +goat +honorable +whereby +skip +ricardo +stripes +parachute +adjoining +submerged +synthesizer +##gren +intend +positively +ninety +phi +beaver +partition +fellows +alexis +prohibition +carlisle +bizarre +fraternity +##bre +doubts +icy +cbc +aquatic +sneak +sonny +combines +airports +crude +supervised +spatial +merge +alfonso +##bic +corrupt +scan +undergo +##ams +disabilities +colombian +comparing +dolphins +perkins +##lish +reprinted +unanimous +bounced +hairs +underworld +midwest +semester +bucket +paperback +miniseries +coventry +demise +##leigh +demonstrations +sensor +rotating +yan +##hler +arrange +soils +##idge +hyderabad +labs +##dr +brakes +grandchildren +##nde +negotiated +rover +ferrari +continuation +directorate +augusta +stevenson +counterpart +gore +##rda +nursery +rican +ave +collectively +broadly +pastoral +repertoire +asserted +discovering +nordic +styled +fiba +cunningham +harley +middlesex +survives +tumor +tempo +zack +aiming +lok +urgent +##rade +##nto +devils +##ement +contractor +turin +##wl +##ool +bliss +repaired +simmons +moan +astronomical +cr +negotiate +lyric +1890s +lara +bred +clad +angus +pbs +##ience +engineered +posed +##lk +hernandez +possessions +elbows +psychiatric +strokes +confluence +electorate +lifts +campuses +lava +alps +##ep +##ution +##date +physicist +woody +##page +##ographic +##itis +juliet +reformation +sparhawk +320 +complement +suppressed +jewel +##½ +floated +##kas +continuity +sadly +##ische +inability +melting +scanning +paula +flour +judaism +safer +vague +##lm +solving +curb +##stown +financially +gable +bees +expired +miserable +cassidy +dominion +1789 +cupped +145 +robbery +facto +amos +warden +resume +tallest +marvin +ing +pounded +usd +declaring +gasoline +##aux +darkened +270 +650 +sophomore +##mere +erection +gossip +televised +risen +dial +##eu +pillars +##link +passages +profound +##tina +arabian +ashton +silicon +nail +##ead +##lated +##wer +##hardt +fleming +firearms +ducked +circuits +blows +waterloo +titans +##lina +atom +fireplace +cheshire +financed +activation +algorithms +##zzi +constituent +catcher +cherokee +partnerships +sexuality +platoon +tragic +vivian +guarded +whiskey +meditation +poetic +##late +##nga +##ake +porto +listeners +dominance +kendra +mona +chandler +factions +22nd +salisbury +attitudes +derivative +##ido +##haus +intake +paced +javier +illustrator +barrels +bias +cockpit +burnett +dreamed +ensuing +##anda +receptors +someday +hawkins +mattered +##lal +slavic +1799 +jesuit +cameroon +wasted +tai +wax +lowering +victorious +freaking +outright +hancock +librarian +sensing +bald +calcium +myers +tablet +announcing +barack +shipyard +pharmaceutical +##uan +greenwich +flush +medley +patches +wolfgang +pt +speeches +acquiring +exams +nikolai +##gg +hayden +kannada +##type +reilly +##pt +waitress +abdomen +devastated +capped +pseudonym +pharmacy +fulfill +paraguay +1796 +clicked +##trom +archipelago +syndicated +##hman +lumber +orgasm +rejection +clifford +lorraine +advent +mafia +rodney +brock +##ght +##used +##elia +cassette +chamberlain +despair +mongolia +sensors +developmental +upstream +##eg +##alis +spanning +165 +trombone +basque +seeded +interred +renewable +rhys +leapt +revision +molecule +##ages +chord +vicious +nord +shivered +23rd +arlington +debts +corpus +sunrise +bays +blackburn +centimetres +##uded +shuddered +gm +strangely +gripping +cartoons +isabelle +orbital +##ppa +seals +proving +##lton +refusal +strengthened +bust +assisting +baghdad +batsman +portrayal +mara +pushes +spears +og +##cock +reside +nathaniel +brennan +1776 +confirmation +caucus +##worthy +markings +yemen +nobles +ku +lazy +viewer +catalan +encompasses +sawyer +##fall +sparked +substances +patents +braves +arranger +evacuation +sergio +persuade +dover +tolerance +penguin +cum +jockey +insufficient +townships +occupying +declining +plural +processed +projection +puppet +flanders +introduces +liability +##yon +gymnastics +antwerp +taipei +hobart +candles +jeep +wes +observers +126 +chaplain +bundle +glorious +##hine +hazel +flung +sol +excavations +dumped +stares +sh +bangalore +triangular +icelandic +intervals +expressing +turbine +##vers +songwriting +crafts +##igo +jasmine +ditch +rite +##ways +entertaining +comply +sorrow +wrestlers +basel +emirates +marian +rivera +helpful +##some +caution +downward +networking +##atory +##tered +darted +genocide +emergence +replies +specializing +spokesman +convenient +unlocked +fading +augustine +concentrations +resemblance +elijah +investigator +andhra +##uda +promotes +bean +##rrell +fleeing +wan +simone +announcer +##ame +##bby +lydia +weaver +132 +residency +modification +##fest +stretches +##ast +alternatively +nat +lowe +lacks +##ented +pam +tile +concealed +inferior +abdullah +residences +tissues +vengeance +##ided +moisture +peculiar +groove +zip +bologna +jennings +ninja +oversaw +zombies +pumping +batch +livingston +emerald +installations +1797 +peel +nitrogen +rama +##fying +##star +schooling +strands +responding +werner +##ost +lime +casa +accurately +targeting +##rod +underway +##uru +hemisphere +lester +##yard +occupies +2d +griffith +angrily +reorganized +##owing +courtney +deposited +##dd +##30 +estadio +##ifies +dunn +exiled +##ying +checks +##combe +##о +##fly +successes +unexpectedly +blu +assessed +##flower +##ه +observing +sacked +spiders +kn +##tail +mu +nodes +prosperity +audrey +divisional +155 +broncos +tangled +adjust +feeds +erosion +paolo +surf +directory +snatched +humid +admiralty +screwed +gt +reddish +##nese +modules +trench +lamps +bind +leah +bucks +competes +##nz +##form +transcription +##uc +isles +violently +clutching +pga +cyclist +inflation +flats +ragged +unnecessary +##hian +stubborn +coordinated +harriet +baba +disqualified +330 +insect +wolfe +##fies +reinforcements +rocked +duel +winked +embraced +bricks +##raj +hiatus +defeats +pending +brightly +jealousy +##xton +##hm +##uki +lena +gdp +colorful +##dley +stein +kidney +##shu +underwear +wanderers +##haw +##icus +guardians +m³ +roared +habits +##wise +permits +gp +uranium +punished +disguise +bundesliga +elise +dundee +erotic +partisan +pi +collectors +float +individually +rendering +behavioral +bucharest +ser +hare +valerie +corporal +nutrition +proportional +##isa +immense +##kis +pavement +##zie +##eld +sutherland +crouched +1775 +##lp +suzuki +trades +endurance +operas +crosby +prayed +priory +rory +socially +##urn +gujarat +##pu +walton +cube +pasha +privilege +lennon +floods +thorne +waterfall +nipple +scouting +approve +##lov +minorities +voter +dwight +extensions +assure +ballroom +slap +dripping +privileges +rejoined +confessed +demonstrating +patriotic +yell +investor +##uth +pagan +slumped +squares +##cle +##kins +confront +bert +embarrassment +##aid +aston +urging +sweater +starr +yuri +brains +williamson +commuter +mortar +structured +selfish +exports +##jon +cds +##him +unfinished +##rre +mortgage +destinations +##nagar +canoe +solitary +buchanan +delays +magistrate +fk +##pling +motivation +##lier +##vier +recruiting +assess +##mouth +malik +antique +1791 +pius +rahman +reich +tub +zhou +smashed +airs +galway +xii +conditioning +honduras +discharged +dexter +##pf +lionel +129 +debates +lemon +tiffany +volunteered +dom +dioxide +procession +devi +sic +tremendous +advertisements +colts +transferring +verdict +hanover +decommissioned +utter +relate +pac +racism +##top +beacon +limp +similarity +terra +occurrence +ant +##how +becky +capt +updates +armament +richie +pal +##graph +halloween +mayo +##ssen +##bone +cara +serena +fcc +dolls +obligations +##dling +violated +lafayette +jakarta +exploitation +##ime +infamous +iconic +##lah +##park +kitty +moody +reginald +dread +spill +crystals +olivier +modeled +bluff +equilibrium +separating +notices +ordnance +extinction +onset +cosmic +attachment +sammy +expose +privy +anchored +##bil +abbott +admits +bending +baritone +emmanuel +policeman +vaughan +winged +climax +dresses +denny +polytechnic +mohamed +burmese +authentic +nikki +genetics +grandparents +homestead +gaza +postponed +metacritic +una +##sby +##bat +unstable +dissertation +##rial +##cian +curls +obscure +uncovered +bronx +praying +disappearing +##hoe +prehistoric +coke +turret +mutations +nonprofit +pits +monaco +##ي +##usion +prominently +dispatched +podium +##mir +uci +##uation +133 +fortifications +birthplace +kendall +##lby +##oll +preacher +rack +goodman +##rman +persistent +##ott +countless +jaime +recorder +lexington +persecution +jumps +renewal +wagons +##11 +crushing +##holder +decorations +##lake +abundance +wrath +laundry +£1 +garde +##rp +jeanne +beetles +peasant +##sl +splitting +caste +sergei +##rer +##ema +scripts +##ively +rub +satellites +##vor +inscribed +verlag +scrapped +gale +packages +chick +potato +slogan +kathleen +arabs +##culture +counterparts +reminiscent +choral +##tead +rand +retains +bushes +dane +accomplish +courtesy +closes +##oth +slaughter +hague +krakow +lawson +tailed +elias +ginger +##ttes +canopy +betrayal +rebuilding +turf +##hof +frowning +allegiance +brigades +kicks +rebuild +polls +alias +nationalism +td +rowan +audition +bowie +fortunately +recognizes +harp +dillon +horrified +##oro +renault +##tics +ropes +##α +presumed +rewarded +infrared +wiping +accelerated +illustration +##rid +presses +practitioners +badminton +##iard +detained +##tera +recognizing +relates +misery +##sies +##tly +reproduction +piercing +potatoes +thornton +esther +manners +hbo +##aan +ours +bullshit +ernie +perennial +sensitivity +illuminated +rupert +##jin +##iss +##ear +rfc +nassau +##dock +staggered +socialism +##haven +appointments +nonsense +prestige +sharma +haul +##tical +solidarity +gps +##ook +##rata +igor +pedestrian +##uit +baxter +tenants +wires +medication +unlimited +guiding +impacts +diabetes +##rama +sasha +pas +clive +extraction +131 +continually +constraints +##bilities +sonata +hunted +sixteenth +chu +planting +quote +mayer +pretended +abs +spat +##hua +ceramic +##cci +curtains +pigs +pitching +##dad +latvian +sore +dayton +##sted +##qi +patrols +slice +playground +##nted +shone +stool +apparatus +inadequate +mates +treason +##ija +desires +##liga +##croft +somalia +laurent +mir +leonardo +oracle +grape +obliged +chevrolet +thirteenth +stunning +enthusiastic +##ede +accounted +concludes +currents +basil +##kovic +drought +##rica +mai +##aire +shove +posting +##shed +pilgrimage +humorous +packing +fry +pencil +wines +smells +144 +marilyn +aching +newest +clung +bon +neighbours +sanctioned +##pie +mug +##stock +drowning +##mma +hydraulic +##vil +hiring +reminder +lilly +investigators +##ncies +sour +##eous +compulsory +packet +##rion +##graphic +##elle +cannes +##inate +depressed +##rit +heroic +importantly +theresa +##tled +conway +saturn +marginal +rae +##xia +corresponds +royce +pact +jasper +explosives +packaging +aluminium +##ttered +denotes +rhythmic +spans +assignments +hereditary +outlined +originating +sundays +lad +reissued +greeting +beatrice +##dic +pillar +marcos +plots +handbook +alcoholic +judiciary +avant +slides +extract +masculine +blur +##eum +##force +homage +trembled +owens +hymn +trey +omega +signaling +socks +accumulated +reacted +attic +theo +lining +angie +distraction +primera +talbot +##key +1200 +ti +creativity +billed +##hey +deacon +eduardo +identifies +proposition +dizzy +gunner +hogan +##yam +##pping +##hol +ja +##chan +jensen +reconstructed +##berger +clearance +darius +##nier +abe +harlem +plea +dei +circled +emotionally +notation +fascist +neville +exceeded +upwards +viable +ducks +##fo +workforce +racer +limiting +shri +##lson +possesses +1600 +kerr +moths +devastating +laden +disturbing +locking +##cture +gal +fearing +accreditation +flavor +aide +1870s +mountainous +##baum +melt +##ures +motel +texture +servers +soda +##mb +herd +##nium +erect +puzzled +hum +peggy +examinations +gould +testified +geoff +ren +devised +sacks +##law +denial +posters +grunted +cesar +tutor +ec +gerry +offerings +byrne +falcons +combinations +ct +incoming +pardon +rocking +26th +avengers +flared +mankind +seller +uttar +loch +nadia +stroking +exposing +##hd +fertile +ancestral +instituted +##has +noises +prophecy +taxation +eminent +vivid +pol +##bol +dart +indirect +multimedia +notebook +upside +displaying +adrenaline +referenced +geometric +##iving +progression +##ddy +blunt +announce +##far +implementing +##lav +aggression +liaison +cooler +cares +headache +plantations +gorge +dots +impulse +thickness +ashamed +averaging +kathy +obligation +precursor +137 +fowler +symmetry +thee +225 +hears +##rai +undergoing +ads +butcher +bowler +##lip +cigarettes +subscription +goodness +##ically +browne +##hos +##tech +kyoto +donor +##erty +damaging +friction +drifting +expeditions +hardened +prostitution +152 +fauna +blankets +claw +tossing +snarled +butterflies +recruits +investigative +coated +healed +138 +communal +hai +xiii +academics +boone +psychologist +restless +lahore +stephens +mba +brendan +foreigners +printer +##pc +ached +explode +27th +deed +scratched +dared +##pole +cardiac +1780 +okinawa +proto +commando +compelled +oddly +electrons +##base +replica +thanksgiving +##rist +sheila +deliberate +stafford +tidal +representations +hercules +ou +##path +##iated +kidnapping +lenses +##tling +deficit +samoa +mouths +consuming +computational +maze +granting +smirk +razor +fixture +ideals +inviting +aiden +nominal +##vs +issuing +julio +pitt +ramsey +docks +##oss +exhaust +##owed +bavarian +draped +anterior +mating +ethiopian +explores +noticing +##nton +discarded +convenience +hoffman +endowment +beasts +cartridge +mormon +paternal +probe +sleeves +interfere +lump +deadline +##rail +jenks +bulldogs +scrap +alternating +justified +reproductive +nam +seize +descending +secretariat +kirby +coupe +grouped +smash +panther +sedan +tapping +##18 +lola +cheer +germanic +unfortunate +##eter +unrelated +##fan +subordinate +##sdale +suzanne +advertisement +##ility +horsepower +##lda +cautiously +discourse +luigi +##mans +##fields +noun +prevalent +mao +schneider +everett +surround +governorate +kira +##avia +westward +##take +misty +rails +sustainability +134 +unused +##rating +packs +toast +unwilling +regulate +thy +suffrage +nile +awe +assam +definitions +travelers +affordable +##rb +conferred +sells +undefeated +beneficial +torso +basal +repeating +remixes +##pass +bahrain +cables +fang +##itated +excavated +numbering +statutory +##rey +deluxe +##lian +forested +ramirez +derbyshire +zeus +slamming +transfers +astronomer +banana +lottery +berg +histories +bamboo +##uchi +resurrection +posterior +bowls +vaguely +##thi +thou +preserving +tensed +offence +##inas +meyrick +callum +ridden +watt +langdon +tying +lowland +snorted +daring +truman +##hale +##girl +aura +overly +filing +weighing +goa +infections +philanthropist +saunders +eponymous +##owski +latitude +perspectives +reviewing +mets +commandant +radial +##kha +flashlight +reliability +koch +vowels +amazed +ada +elaine +supper +##rth +##encies +predator +debated +soviets +cola +##boards +##nah +compartment +crooked +arbitrary +fourteenth +##ctive +havana +majors +steelers +clips +profitable +ambush +exited +packers +##tile +nude +cracks +fungi +##е +limb +trousers +josie +shelby +tens +frederic +##ος +definite +smoothly +constellation +insult +baton +discs +lingering +##nco +conclusions +lent +staging +becker +grandpa +shaky +##tron +einstein +obstacles +sk +adverse +elle +economically +##moto +mccartney +thor +dismissal +motions +readings +nostrils +treatise +##pace +squeezing +evidently +prolonged +1783 +venezuelan +je +marguerite +beirut +takeover +shareholders +##vent +denise +digit +airplay +norse +##bbling +imaginary +pills +hubert +blaze +vacated +eliminating +##ello +vine +mansfield +##tty +retrospective +barrow +borne +clutch +bail +forensic +weaving +##nett +##witz +desktop +citadel +promotions +worrying +dorset +ieee +subdivided +##iating +manned +expeditionary +pickup +synod +chuckle +185 +barney +##rz +##ffin +functionality +karachi +litigation +meanings +uc +lick +turbo +anders +##ffed +execute +curl +oppose +ankles +typhoon +##د +##ache +##asia +linguistics +compassion +pressures +grazing +perfection +##iting +immunity +monopoly +muddy +backgrounds +136 +namibia +francesca +monitors +attracting +stunt +tuition +##ии +vegetable +##mates +##quent +mgm +jen +complexes +forts +##ond +cellar +bites +seventeenth +royals +flemish +failures +mast +charities +##cular +peruvian +capitals +macmillan +ipswich +outward +frigate +postgraduate +folds +employing +##ouse +concurrently +fiery +##tai +contingent +nightmares +monumental +nicaragua +##kowski +lizard +mal +fielding +gig +reject +##pad +harding +##ipe +coastline +##cin +##nos +beethoven +humphrey +innovations +##tam +##nge +norris +doris +solicitor +huang +obey +141 +##lc +niagara +##tton +shelves +aug +bourbon +curry +nightclub +specifications +hilton +##ndo +centennial +dispersed +worm +neglected +briggs +sm +font +kuala +uneasy +plc +##nstein +##bound +##aking +##burgh +awaiting +pronunciation +##bbed +##quest +eh +optimal +zhu +raped +greens +presided +brenda +worries +##life +venetian +marxist +turnout +##lius +refined +braced +sins +grasped +sunderland +nickel +speculated +lowell +cyrillic +communism +fundraising +resembling +colonists +mutant +freddie +usc +##mos +gratitude +##run +mural +##lous +chemist +wi +reminds +28th +steals +tess +pietro +##ingen +promoter +ri +microphone +honoured +rai +sant +##qui +feather +##nson +burlington +kurdish +terrorists +deborah +sickness +##wed +##eet +hazard +irritated +desperation +veil +clarity +##rik +jewels +xv +##gged +##ows +##cup +berkshire +unfair +mysteries +orchid +winced +exhaustion +renovations +stranded +obe +infinity +##nies +adapt +redevelopment +thanked +registry +olga +domingo +noir +tudor +ole +##atus +commenting +behaviors +##ais +crisp +pauline +probable +stirling +wigan +##bian +paralympics +panting +surpassed +##rew +luca +barred +pony +famed +##sters +cassandra +waiter +carolyn +exported +##orted +andres +destructive +deeds +jonah +castles +vacancy +suv +##glass +1788 +orchard +yep +famine +belarusian +sprang +##forth +skinny +##mis +administrators +rotterdam +zambia +zhao +boiler +discoveries +##ride +##physics +lucius +disappointing +outreach +spoon +##frame +qualifications +unanimously +enjoys +regency +##iidae +stade +realism +veterinary +rodgers +dump +alain +chestnut +castile +censorship +rumble +gibbs +##itor +communion +reggae +inactivated +logs +loads +##houses +homosexual +##iano +ale +informs +##cas +phrases +plaster +linebacker +ambrose +kaiser +fascinated +850 +limerick +recruitment +forge +mastered +##nding +leinster +rooted +threaten +##strom +borneo +##hes +suggestions +scholarships +propeller +documentaries +patronage +coats +constructing +invest +neurons +comet +entirety +shouts +identities +annoying +unchanged +wary +##antly +##ogy +neat +oversight +##kos +phillies +replay +constance +##kka +incarnation +humble +skies +minus +##acy +smithsonian +##chel +guerrilla +jar +cadets +##plate +surplus +audit +##aru +cracking +joanna +louisa +pacing +##lights +intentionally +##iri +diner +nwa +imprint +australians +tong +unprecedented +bunker +naive +specialists +ark +nichols +railing +leaked +pedal +##uka +shrub +longing +roofs +v8 +captains +neural +tuned +##ntal +##jet +emission +medina +frantic +codex +definitive +sid +abolition +intensified +stocks +enrique +sustain +genoa +oxide +##written +clues +cha +##gers +tributaries +fragment +venom +##rity +##ente +##sca +muffled +vain +sire +laos +##ingly +##hana +hastily +snapping +surfaced +sentiment +motive +##oft +contests +approximate +mesa +luckily +dinosaur +exchanges +propelled +accord +bourne +relieve +tow +masks +offended +##ues +cynthia +##mmer +rains +bartender +zinc +reviewers +lois +##sai +legged +arrogant +rafe +rosie +comprise +handicap +blockade +inlet +lagoon +copied +drilling +shelley +petals +##inian +mandarin +obsolete +##inated +onward +arguably +productivity +cindy +praising +seldom +busch +discusses +raleigh +shortage +ranged +stanton +encouragement +firstly +conceded +overs +temporal +##uke +cbe +##bos +woo +certainty +pumps +##pton +stalked +##uli +lizzie +periodic +thieves +weaker +##night +gases +shoving +chooses +wc +##chemical +prompting +weights +##kill +robust +flanked +sticky +hu +tuberculosis +##eb +##eal +christchurch +resembled +wallet +reese +inappropriate +pictured +distract +fixing +fiddle +giggled +burger +heirs +hairy +mechanic +torque +apache +obsessed +chiefly +cheng +logging +##tag +extracted +meaningful +numb +##vsky +gloucestershire +reminding +##bay +unite +##lit +breeds +diminished +clown +glove +1860s +##ن +##ug +archibald +focal +freelance +sliced +depiction +##yk +organism +switches +sights +stray +crawling +##ril +lever +leningrad +interpretations +loops +anytime +reel +alicia +delighted +##ech +inhaled +xiv +suitcase +bernie +vega +licenses +northampton +exclusion +induction +monasteries +racecourse +homosexuality +##right +##sfield +##rky +dimitri +michele +alternatives +ions +commentators +genuinely +objected +pork +hospitality +fencing +stephan +warships +peripheral +wit +drunken +wrinkled +quentin +spends +departing +chung +numerical +spokesperson +##zone +johannesburg +caliber +killers +##udge +assumes +neatly +demographic +abigail +bloc +##vel +mounting +##lain +bentley +slightest +xu +recipients +##jk +merlin +##writer +seniors +prisons +blinking +hindwings +flickered +kappa +##hel +80s +strengthening +appealing +brewing +gypsy +mali +lashes +hulk +unpleasant +harassment +bio +treaties +predict +instrumentation +pulp +troupe +boiling +mantle +##ffe +ins +##vn +dividing +handles +verbs +##onal +coconut +senegal +340 +thorough +gum +momentarily +##sto +cocaine +panicked +destined +##turing +teatro +denying +weary +captained +mans +##hawks +##code +wakefield +bollywood +thankfully +##16 +cyril +##wu +amendments +##bahn +consultation +stud +reflections +kindness +1787 +internally +##ovo +tex +mosaic +distribute +paddy +seeming +143 +##hic +piers +##15 +##mura +##verse +popularly +winger +kang +sentinel +mccoy +##anza +covenant +##bag +verge +fireworks +suppress +thrilled +dominate +##jar +swansea +##60 +142 +reconciliation +##ndi +stiffened +cue +dorian +##uf +damascus +amor +ida +foremost +##aga +porsche +unseen +dir +##had +##azi +stony +lexi +melodies +##nko +angular +integer +podcast +ants +inherent +jaws +justify +persona +##olved +josephine +##nr +##ressed +customary +flashes +gala +cyrus +glaring +backyard +ariel +physiology +greenland +html +stir +avon +atletico +finch +methodology +ked +##lent +mas +catholicism +townsend +branding +quincy +fits +containers +1777 +ashore +aragon +##19 +forearm +poisoning +##sd +adopting +conquer +grinding +amnesty +keller +finances +evaluate +forged +lankan +instincts +##uto +guam +bosnian +photographed +workplace +desirable +protector +##dog +allocation +intently +encourages +willy +##sten +bodyguard +electro +brighter +##ν +bihar +##chev +lasts +opener +amphibious +sal +verde +arte +##cope +captivity +vocabulary +yields +##tted +agreeing +desmond +pioneered +##chus +strap +campaigned +railroads +##ович +emblem +##dre +stormed +501 +##ulous +marijuana +northumberland +##gn +##nath +bowen +landmarks +beaumont +##qua +danube +##bler +attorneys +th +ge +flyers +critique +villains +cass +mutation +acc +##0s +colombo +mckay +motif +sampling +concluding +syndicate +##rell +neon +stables +ds +warnings +clint +mourning +wilkinson +##tated +merrill +leopard +evenings +exhaled +emil +sonia +ezra +discrete +stove +farrell +fifteenth +prescribed +superhero +##rier +worms +helm +wren +##duction +##hc +expo +##rator +hq +unfamiliar +antony +prevents +acceleration +fiercely +mari +painfully +calculations +cheaper +ign +clifton +irvine +davenport +mozambique +##np +pierced +##evich +wonders +##wig +##cate +##iling +crusade +ware +##uel +enzymes +reasonably +mls +##coe +mater +ambition +bunny +eliot +kernel +##fin +asphalt +headmaster +torah +aden +lush +pins +waived +##care +##yas +joao +substrate +enforce +##grad +##ules +alvarez +selections +epidemic +tempted +##bit +bremen +translates +ensured +waterfront +29th +forrest +manny +malone +kramer +reigning +cookies +simpler +absorption +205 +engraved +##ffy +evaluated +1778 +haze +146 +comforting +crossover +##abe +thorn +##rift +##imo +##pop +suppression +fatigue +cutter +##tr +201 +wurttemberg +##orf +enforced +hovering +proprietary +gb +samurai +syllable +ascent +lacey +tick +lars +tractor +merchandise +rep +bouncing +defendants +##yre +huntington +##ground +##oko +standardized +##hor +##hima +assassinated +nu +predecessors +rainy +liar +assurance +lyrical +##uga +secondly +flattened +ios +parameter +undercover +##mity +bordeaux +punish +ridges +markers +exodus +inactive +hesitate +debbie +nyc +pledge +savoy +nagar +offset +organist +##tium +hesse +marin +converting +##iver +diagram +propulsion +pu +validity +reverted +supportive +##dc +ministries +clans +responds +proclamation +##inae +##ø +##rea +ein +pleading +patriot +sf +birch +islanders +strauss +hates +##dh +brandenburg +concession +rd +##ob +1900s +killings +textbook +antiquity +cinematography +wharf +embarrassing +setup +creed +farmland +inequality +centred +signatures +fallon +370 +##ingham +##uts +ceylon +gazing +directive +laurie +##tern +globally +##uated +##dent +allah +excavation +threads +##cross +148 +frantically +icc +utilize +determines +respiratory +thoughtful +receptions +##dicate +merging +chandra +seine +147 +builders +builds +diagnostic +dev +visibility +goddamn +analyses +dhaka +cho +proves +chancel +concurrent +curiously +canadians +pumped +restoring +1850s +turtles +jaguar +sinister +spinal +traction +declan +vows +1784 +glowed +capitalism +swirling +install +universidad +##lder +##oat +soloist +##genic +##oor +coincidence +beginnings +nissan +dip +resorts +caucasus +combustion +infectious +##eno +pigeon +serpent +##itating +conclude +masked +salad +jew +##gr +surreal +toni +##wc +harmonica +151 +##gins +##etic +##coat +fishermen +intending +bravery +##wave +klaus +titan +wembley +taiwanese +ransom +40th +incorrect +hussein +eyelids +jp +cooke +dramas +utilities +##etta +##print +eisenhower +principally +granada +lana +##rak +openings +concord +##bl +bethany +connie +morality +sega +##mons +##nard +earnings +##kara +##cine +wii +communes +##rel +coma +composing +softened +severed +grapes +##17 +nguyen +analyzed +warlord +hubbard +heavenly +behave +slovenian +##hit +##ony +hailed +filmmakers +trance +caldwell +skye +unrest +coward +likelihood +##aging +bern +sci +taliban +honolulu +propose +##wang +1700 +browser +imagining +cobra +contributes +dukes +instinctively +conan +violinist +##ores +accessories +gradual +##amp +quotes +sioux +##dating +undertake +intercepted +sparkling +compressed +139 +fungus +tombs +haley +imposing +rests +degradation +lincolnshire +retailers +wetlands +tulsa +distributor +dungeon +nun +greenhouse +convey +atlantis +aft +exits +oman +dresser +lyons +##sti +joking +eddy +judgement +omitted +digits +##cts +##game +juniors +##rae +cents +stricken +une +##ngo +wizards +weir +breton +nan +technician +fibers +liking +royalty +##cca +154 +persia +terribly +magician +##rable +##unt +vance +cafeteria +booker +camille +warmer +##static +consume +cavern +gaps +compass +contemporaries +foyer +soothing +graveyard +maj +plunged +blush +##wear +cascade +demonstrates +ordinance +##nov +boyle +##lana +rockefeller +shaken +banjo +izzy +##ense +breathless +vines +##32 +##eman +alterations +chromosome +dwellings +feudal +mole +153 +catalonia +relics +tenant +mandated +##fm +fridge +hats +honesty +patented +raul +heap +cruisers +accusing +enlightenment +infants +wherein +chatham +contractors +zen +affinity +hc +osborne +piston +156 +traps +maturity +##rana +lagos +##zal +peering +##nay +attendant +dealers +protocols +subset +prospects +biographical +##cre +artery +##zers +insignia +nuns +endured +##eration +recommend +schwartz +serbs +berger +cromwell +crossroads +##ctor +enduring +clasped +grounded +##bine +marseille +twitched +abel +choke +https +catalyst +moldova +italians +##tist +disastrous +wee +##oured +##nti +wwf +nope +##piration +##asa +expresses +thumbs +167 +##nza +coca +1781 +cheating +##ption +skipped +sensory +heidelberg +spies +satan +dangers +semifinal +202 +bohemia +whitish +confusing +shipbuilding +relies +surgeons +landings +ravi +baku +moor +suffix +alejandro +##yana +litre +upheld +##unk +rajasthan +##rek +coaster +insists +posture +scenarios +etienne +favoured +appoint +transgender +elephants +poked +greenwood +defences +fulfilled +militant +somali +1758 +chalk +potent +##ucci +migrants +wink +assistants +nos +restriction +activism +niger +##ario +colon +shaun +##sat +daphne +##erated +swam +congregations +reprise +considerations +magnet +playable +xvi +##р +overthrow +tobias +knob +chavez +coding +##mers +propped +katrina +orient +newcomer +##suke +temperate +##pool +farmhouse +interrogation +##vd +committing +##vert +forthcoming +strawberry +joaquin +macau +ponds +shocking +siberia +##cellular +chant +contributors +##nant +##ologists +sped +absorb +hail +1782 +spared +##hore +barbados +karate +opus +originates +saul +##xie +evergreen +leaped +##rock +correlation +exaggerated +weekday +unification +bump +tracing +brig +afb +pathways +utilizing +##ners +mod +mb +disturbance +kneeling +##stad +##guchi +100th +pune +##thy +decreasing +168 +manipulation +miriam +academia +ecosystem +occupational +rbi +##lem +rift +##14 +rotary +stacked +incorporation +awakening +generators +guerrero +racist +##omy +cyber +derivatives +culminated +allie +annals +panzer +sainte +wikipedia +pops +zu +austro +##vate +algerian +politely +nicholson +mornings +educate +tastes +thrill +dartmouth +##gating +db +##jee +regan +differing +concentrating +choreography +divinity +##media +pledged +alexandre +routing +gregor +madeline +##idal +apocalypse +##hora +gunfire +culminating +elves +fined +liang +lam +programmed +tar +guessing +transparency +gabrielle +##gna +cancellation +flexibility +##lining +accession +shea +stronghold +nets +specializes +##rgan +abused +hasan +sgt +ling +exceeding +##₄ +admiration +supermarket +##ark +photographers +specialised +tilt +resonance +hmm +perfume +380 +sami +threatens +garland +botany +guarding +boiled +greet +puppy +russo +supplier +wilmington +vibrant +vijay +##bius +paralympic +grumbled +paige +faa +licking +margins +hurricanes +##gong +fest +grenade +ripping +##uz +counseling +weigh +##sian +needles +wiltshire +edison +costly +##not +fulton +tramway +redesigned +staffordshire +cache +gasping +watkins +sleepy +candidacy +##group +monkeys +timeline +throbbing +##bid +##sos +berth +uzbekistan +vanderbilt +bothering +overturned +ballots +gem +##iger +sunglasses +subscribers +hooker +compelling +ang +exceptionally +saloon +stab +##rdi +carla +terrifying +rom +##vision +coil +##oids +satisfying +vendors +31st +mackay +deities +overlooked +ambient +bahamas +felipe +olympia +whirled +botanist +advertised +tugging +##dden +disciples +morales +unionist +rites +foley +morse +motives +creepy +##₀ +soo +##sz +bargain +highness +frightening +turnpike +tory +reorganization +##cer +depict +biographer +##walk +unopposed +manifesto +##gles +institut +emile +accidental +kapoor +##dam +kilkenny +cortex +lively +##13 +romanesque +jain +shan +cannons +##ood +##ske +petrol +echoing +amalgamated +disappears +cautious +proposes +sanctions +trenton +##ر +flotilla +aus +contempt +tor +canary +cote +theirs +##hun +conceptual +deleted +fascinating +paso +blazing +elf +honourable +hutchinson +##eiro +##outh +##zin +surveyor +tee +amidst +wooded +reissue +intro +##ono +cobb +shelters +newsletter +hanson +brace +encoding +confiscated +dem +caravan +marino +scroll +melodic +cows +imam +##adi +##aneous +northward +searches +biodiversity +cora +310 +roaring +##bers +connell +theologian +halo +compose +pathetic +unmarried +dynamo +##oot +az +calculation +toulouse +deserves +humour +nr +forgiveness +tam +undergone +martyr +pamela +myths +whore +counselor +hicks +290 +heavens +battleship +electromagnetic +##bbs +stellar +establishments +presley +hopped +##chin +temptation +90s +wills +nas +##yuan +nhs +##nya +seminars +##yev +adaptations +gong +asher +lex +indicator +sikh +tobago +cites +goin +##yte +satirical +##gies +characterised +correspond +bubbles +lure +participates +##vid +eruption +skate +therapeutic +1785 +canals +wholesale +defaulted +sac +460 +petit +##zzled +virgil +leak +ravens +256 +portraying +##yx +ghetto +creators +dams +portray +vicente +##rington +fae +namesake +bounty +##arium +joachim +##ota +##iser +aforementioned +axle +snout +depended +dismantled +reuben +480 +##ibly +gallagher +##lau +##pd +earnest +##ieu +##iary +inflicted +objections +##llar +asa +gritted +##athy +jericho +##sea +##was +flick +underside +ceramics +undead +substituted +195 +eastward +undoubtedly +wheeled +chimney +##iche +guinness +cb +##ager +siding +##bell +traitor +baptiste +disguised +inauguration +149 +tipperary +choreographer +perched +warmed +stationary +eco +##ike +##ntes +bacterial +##aurus +flores +phosphate +##core +attacker +invaders +alvin +intersects +a1 +indirectly +immigrated +businessmen +cornelius +valves +narrated +pill +sober +ul +nationale +monastic +applicants +scenery +##jack +161 +motifs +constitutes +cpu +##osh +jurisdictions +sd +tuning +irritation +woven +##uddin +fertility +gao +##erie +antagonist +impatient +glacial +hides +boarded +denominations +interception +##jas +cookie +nicola +##tee +algebraic +marquess +bahn +parole +buyers +bait +turbines +paperwork +bestowed +natasha +renee +oceans +purchases +157 +vaccine +215 +##tock +fixtures +playhouse +integrate +jai +oswald +intellectuals +##cky +booked +nests +mortimer +##isi +obsession +sept +##gler +##sum +440 +scrutiny +simultaneous +squinted +##shin +collects +oven +shankar +penned +remarkably +##я +slips +luggage +spectral +1786 +collaborations +louie +consolidation +##ailed +##ivating +420 +hoover +blackpool +harness +ignition +vest +tails +belmont +mongol +skinner +##nae +visually +mage +derry +##tism +##unce +stevie +transitional +##rdy +redskins +drying +prep +prospective +##21 +annoyance +oversee +##loaded +fills +##books +##iki +announces +fda +scowled +respects +prasad +mystic +tucson +##vale +revue +springer +bankrupt +1772 +aristotle +salvatore +habsburg +##geny +dal +natal +nut +pod +chewing +darts +moroccan +walkover +rosario +lenin +punjabi +##ße +grossed +scattering +wired +invasive +hui +polynomial +corridors +wakes +gina +portrays +##cratic +arid +retreating +erich +irwin +sniper +##dha +linen +lindsey +maneuver +butch +shutting +socio +bounce +commemorative +postseason +jeremiah +pines +275 +mystical +beads +bp +abbas +furnace +bidding +consulted +assaulted +empirical +rubble +enclosure +sob +weakly +cancel +polly +yielded +##emann +curly +prediction +battered +70s +vhs +jacqueline +render +sails +barked +detailing +grayson +riga +sloane +raging +##yah +herbs +bravo +##athlon +alloy +giggle +imminent +suffers +assumptions +waltz +##itate +accomplishments +##ited +bathing +remixed +deception +prefix +##emia +deepest +##tier +##eis +balkan +frogs +##rong +slab +##pate +philosophers +peterborough +grains +imports +dickinson +rwanda +##atics +1774 +dirk +lan +tablets +##rove +clone +##rice +caretaker +hostilities +mclean +##gre +regimental +treasures +norms +impose +tsar +tango +diplomacy +variously +complain +192 +recognise +arrests +1779 +celestial +pulitzer +##dus +bing +libretto +##moor +adele +splash +##rite +expectation +lds +confronts +##izer +spontaneous +harmful +wedge +entrepreneurs +buyer +##ope +bilingual +translate +rugged +conner +circulated +uae +eaton +##gra +##zzle +lingered +lockheed +vishnu +reelection +alonso +##oom +joints +yankee +headline +cooperate +heinz +laureate +invading +##sford +echoes +scandinavian +##dham +hugging +vitamin +salute +micah +hind +trader +##sper +radioactive +##ndra +militants +poisoned +ratified +remark +campeonato +deprived +wander +prop +##dong +outlook +##tani +##rix +##eye +chiang +darcy +##oping +mandolin +spice +statesman +babylon +182 +walled +forgetting +afro +##cap +158 +giorgio +buffer +##polis +planetary +##gis +overlap +terminals +kinda +centenary +##bir +arising +manipulate +elm +ke +1770 +ak +##tad +chrysler +mapped +moose +pomeranian +quad +macarthur +assemblies +shoreline +recalls +stratford +##rted +noticeable +##evic +imp +##rita +##sque +accustomed +supplying +tents +disgusted +vogue +sipped +filters +khz +reno +selecting +luftwaffe +mcmahon +tyne +masterpiece +carriages +collided +dunes +exercised +flare +remembers +muzzle +##mobile +heck +##rson +burgess +lunged +middleton +boycott +bilateral +##sity +hazardous +lumpur +multiplayer +spotlight +jackets +goldman +liege +porcelain +rag +waterford +benz +attracts +hopeful +battling +ottomans +kensington +baked +hymns +cheyenne +lattice +levine +borrow +polymer +clashes +michaels +monitored +commitments +denounced +##25 +##von +cavity +##oney +hobby +akin +##holders +futures +intricate +cornish +patty +##oned +illegally +dolphin +##lag +barlow +yellowish +maddie +apologized +luton +plagued +##puram +nana +##rds +sway +fanny +łodz +##rino +psi +suspicions +hanged +##eding +initiate +charlton +##por +nak +competent +235 +analytical +annex +wardrobe +reservations +##rma +sect +162 +fairfax +hedge +piled +buckingham +uneven +bauer +simplicity +snyder +interpret +accountability +donors +moderately +byrd +continents +##cite +##max +disciple +hr +jamaican +ping +nominees +##uss +mongolian +diver +attackers +eagerly +ideological +pillows +miracles +apartheid +revolver +sulfur +clinics +moran +163 +##enko +ile +katy +rhetoric +##icated +chronology +recycling +##hrer +elongated +mughal +pascal +profiles +vibration +databases +domination +##fare +##rant +matthias +digest +rehearsal +polling +weiss +initiation +reeves +clinging +flourished +impress +ngo +##hoff +##ume +buckley +symposium +rhythms +weed +emphasize +transforming +##taking +##gence +##yman +accountant +analyze +flicker +foil +priesthood +voluntarily +decreases +##80 +##hya +slater +sv +charting +mcgill +##lde +moreno +##iu +besieged +zur +robes +##phic +admitting +api +deported +turmoil +peyton +earthquakes +##ares +nationalists +beau +clair +brethren +interrupt +welch +curated +galerie +requesting +164 +##ested +impending +steward +viper +##vina +complaining +beautifully +brandy +foam +nl +1660 +##cake +alessandro +punches +laced +explanations +##lim +attribute +clit +reggie +discomfort +##cards +smoothed +whales +##cene +adler +countered +duffy +disciplinary +widening +recipe +reliance +conducts +goats +gradient +preaching +##shaw +matilda +quasi +striped +meridian +cannabis +cordoba +certificates +##agh +##tering +graffiti +hangs +pilgrims +repeats +##ych +revive +urine +etat +##hawk +fueled +belts +fuzzy +susceptible +##hang +mauritius +salle +sincere +beers +hooks +##cki +arbitration +entrusted +advise +sniffed +seminar +junk +donnell +processors +principality +strapped +celia +mendoza +everton +fortunes +prejudice +starving +reassigned +steamer +##lund +tuck +evenly +foreman +##ffen +dans +375 +envisioned +slit +##xy +baseman +liberia +rosemary +##weed +electrified +periodically +potassium +stride +contexts +sperm +slade +mariners +influx +bianca +subcommittee +##rane +spilling +icao +estuary +##nock +delivers +iphone +##ulata +isa +mira +bohemian +dessert +##sbury +welcoming +proudly +slowing +##chs +musee +ascension +russ +##vian +waits +##psy +africans +exploit +##morphic +gov +eccentric +crab +peck +##ull +entrances +formidable +marketplace +groom +bolted +metabolism +patton +robbins +courier +payload +endure +##ifier +andes +refrigerator +##pr +ornate +##uca +ruthless +illegitimate +masonry +strasbourg +bikes +adobe +##³ +apples +quintet +willingly +niche +bakery +corpses +energetic +##cliffe +##sser +##ards +177 +centimeters +centro +fuscous +cretaceous +rancho +##yde +andrei +telecom +tottenham +oasis +ordination +vulnerability +presiding +corey +cp +penguins +sims +##pis +malawi +piss +##48 +correction +##cked +##ffle +##ryn +countdown +detectives +psychiatrist +psychedelic +dinosaurs +blouse +##get +choi +vowed +##oz +randomly +##pol +49ers +scrub +blanche +bruins +dusseldorf +##using +unwanted +##ums +212 +dominique +elevations +headlights +om +laguna +##oga +1750 +famously +ignorance +shrewsbury +##aine +ajax +breuning +che +confederacy +greco +overhaul +##screen +paz +skirts +disagreement +cruelty +jagged +phoebe +shifter +hovered +viruses +##wes +mandy +##lined +##gc +landlord +squirrel +dashed +##ι +ornamental +gag +wally +grange +literal +spurs +undisclosed +proceeding +yin +##text +billie +orphan +spanned +humidity +indy +weighted +presentations +explosions +lucian +##tary +vaughn +hindus +##anga +##hell +psycho +171 +daytona +protects +efficiently +rematch +sly +tandem +##oya +rebranded +impaired +hee +metropolis +peach +godfrey +diaspora +ethnicity +prosperous +gleaming +dar +grossing +playback +##rden +stripe +pistols +##tain +births +labelled +##cating +172 +rudy +alba +##onne +aquarium +hostility +##gb +##tase +shudder +sumatra +hardest +lakers +consonant +creeping +demos +homicide +capsule +zeke +liberties +expulsion +pueblo +##comb +trait +transporting +##ddin +##neck +##yna +depart +gregg +mold +ledge +hangar +oldham +playboy +termination +analysts +gmbh +romero +##itic +insist +cradle +filthy +brightness +slash +shootout +deposed +bordering +##truct +isis +microwave +tumbled +sheltered +cathy +werewolves +messy +andersen +convex +clapped +clinched +satire +wasting +edo +vc +rufus +##jak +mont +##etti +poznan +##keeping +restructuring +transverse +##rland +azerbaijani +slovene +gestures +roommate +choking +shear +##quist +vanguard +oblivious +##hiro +disagreed +baptism +##lich +coliseum +##aceae +salvage +societe +cory +locke +relocation +relying +versailles +ahl +swelling +##elo +cheerful +##word +##edes +gin +sarajevo +obstacle +diverted +##nac +messed +thoroughbred +fluttered +utrecht +chewed +acquaintance +assassins +dispatch +mirza +##wart +nike +salzburg +swell +yen +##gee +idle +ligue +samson +##nds +##igh +playful +spawned +##cise +tease +##case +burgundy +##bot +stirring +skeptical +interceptions +marathi +##dies +bedrooms +aroused +pinch +##lik +preferences +tattoos +buster +digitally +projecting +rust +##ital +kitten +priorities +addison +pseudo +##guard +dusk +icons +sermon +##psis +##iba +bt +##lift +##xt +ju +truce +rink +##dah +##wy +defects +psychiatry +offences +calculate +glucose +##iful +##rized +##unda +francaise +##hari +richest +warwickshire +carly +1763 +purity +redemption +lending +##cious +muse +bruises +cerebral +aero +carving +##name +preface +terminology +invade +monty +##int +anarchist +blurred +##iled +rossi +treats +guts +shu +foothills +ballads +undertaking +premise +cecilia +affiliates +blasted +conditional +wilder +minors +drone +rudolph +buffy +swallowing +horton +attested +##hop +rutherford +howell +primetime +livery +penal +##bis +minimize +hydro +wrecked +wrought +palazzo +##gling +cans +vernacular +friedman +nobleman +shale +walnut +danielle +##ection +##tley +sears +##kumar +chords +lend +flipping +streamed +por +dracula +gallons +sacrifices +gamble +orphanage +##iman +mckenzie +##gible +boxers +daly +##balls +##ان +208 +##ific +##rative +##iq +exploited +slated +##uity +circling +hillary +pinched +goldberg +provost +campaigning +lim +piles +ironically +jong +mohan +successors +usaf +##tem +##ught +autobiographical +haute +preserves +##ending +acquitted +comparisons +203 +hydroelectric +gangs +cypriot +torpedoes +rushes +chrome +derive +bumps +instability +fiat +pets +##mbe +silas +dye +reckless +settler +##itation +info +heats +##writing +176 +canonical +maltese +fins +mushroom +stacy +aspen +avid +##kur +##loading +vickers +gaston +hillside +statutes +wilde +gail +kung +sabine +comfortably +motorcycles +##rgo +169 +pneumonia +fetch +##sonic +axel +faintly +parallels +##oop +mclaren +spouse +compton +interdisciplinary +miner +##eni +181 +clamped +##chal +##llah +separates +versa +##mler +scarborough +labrador +##lity +##osing +rutgers +hurdles +como +166 +burt +divers +##100 +wichita +cade +coincided +##erson +bruised +mla +##pper +vineyard +##ili +##brush +notch +mentioning +jase +hearted +kits +doe +##acle +pomerania +##ady +ronan +seizure +pavel +problematic +##zaki +domenico +##ulin +catering +penelope +dependence +parental +emilio +ministerial +atkinson +##bolic +clarkson +chargers +colby +grill +peeked +arises +summon +##aged +fools +##grapher +faculties +qaeda +##vial +garner +refurbished +##hwa +geelong +disasters +nudged +bs +shareholder +lori +algae +reinstated +rot +##ades +##nous +invites +stainless +183 +inclusive +##itude +diocesan +til +##icz +denomination +##xa +benton +floral +registers +##ider +##erman +##kell +absurd +brunei +guangzhou +hitter +retaliation +##uled +##eve +blanc +nh +consistency +contamination +##eres +##rner +dire +palermo +broadcasters +diaries +inspire +vols +brewer +tightening +ky +mixtape +hormone +##tok +stokes +##color +##dly +##ssi +pg +##ometer +##lington +sanitation +##tility +intercontinental +apps +##adt +¹⁄₂ +cylinders +economies +favourable +unison +croix +gertrude +odyssey +vanity +dangling +##logists +upgrades +dice +middleweight +practitioner +##ight +206 +henrik +parlor +orion +angered +lac +python +blurted +##rri +sensual +intends +swings +angled +##phs +husky +attain +peerage +precinct +textiles +cheltenham +shuffled +dai +confess +tasting +bhutan +##riation +tyrone +segregation +abrupt +ruiz +##rish +smirked +blackwell +confidential +browning +amounted +##put +vase +scarce +fabulous +raided +staple +guyana +unemployed +glider +shay +##tow +carmine +troll +intervene +squash +superstar +##uce +cylindrical +len +roadway +researched +handy +##rium +##jana +meta +lao +declares +##rring +##tadt +##elin +##kova +willem +shrubs +napoleonic +realms +skater +qi +volkswagen +##ł +tad +hara +archaeologist +awkwardly +eerie +##kind +wiley +##heimer +##24 +titus +organizers +cfl +crusaders +lama +usb +vent +enraged +thankful +occupants +maximilian +##gaard +possessing +textbooks +##oran +collaborator +quaker +##ulo +avalanche +mono +silky +straits +isaiah +mustang +surged +resolutions +potomac +descend +cl +kilograms +plato +strains +saturdays +##olin +bernstein +##ype +holstein +ponytail +##watch +belize +conversely +heroine +perpetual +##ylus +charcoal +piedmont +glee +negotiating +backdrop +prologue +##jah +##mmy +pasadena +climbs +ramos +sunni +##holm +##tner +##tri +anand +deficiency +hertfordshire +stout +##avi +aperture +orioles +##irs +doncaster +intrigued +bombed +coating +otis +##mat +cocktail +##jit +##eto +amir +arousal +sar +##proof +##act +##ories +dixie +pots +##bow +whereabouts +159 +##fted +drains +bullying +cottages +scripture +coherent +fore +poe +appetite +##uration +sampled +##ators +##dp +derrick +rotor +jays +peacock +installment +##rro +advisors +##coming +rodeo +scotch +##mot +##db +##fen +##vant +ensued +rodrigo +dictatorship +martyrs +twenties +##н +towed +incidence +marta +rainforest +sai +scaled +##cles +oceanic +qualifiers +symphonic +mcbride +dislike +generalized +aubrey +colonization +##iation +##lion +##ssing +disliked +lublin +salesman +##ulates +spherical +whatsoever +sweating +avalon +contention +punt +severity +alderman +atari +##dina +##grant +##rop +scarf +seville +vertices +annexation +fairfield +fascination +inspiring +launches +palatinate +regretted +##rca +feral +##iom +elk +nap +olsen +reddy +yong +##leader +##iae +garment +transports +feng +gracie +outrage +viceroy +insides +##esis +breakup +grady +organizer +softer +grimaced +222 +murals +galicia +arranging +vectors +##rsten +bas +##sb +##cens +sloan +##eka +bitten +ara +fender +nausea +bumped +kris +banquet +comrades +detector +persisted +##llan +adjustment +endowed +cinemas +##shot +sellers +##uman +peek +epa +kindly +neglect +simpsons +talon +mausoleum +runaway +hangul +lookout +##cic +rewards +coughed +acquainted +chloride +##ald +quicker +accordion +neolithic +##qa +artemis +coefficient +lenny +pandora +tx +##xed +ecstasy +litter +segunda +chairperson +gemma +hiss +rumor +vow +nasal +antioch +compensate +patiently +transformers +##eded +judo +morrow +penis +posthumous +philips +bandits +husbands +denote +flaming +##any +##phones +langley +yorker +1760 +walters +##uo +##kle +gubernatorial +fatty +samsung +leroy +outlaw +##nine +unpublished +poole +jakob +##ᵢ +##ₙ +crete +distorted +superiority +##dhi +intercept +crust +mig +claus +crashes +positioning +188 +stallion +301 +frontal +armistice +##estinal +elton +aj +encompassing +camel +commemorated +malaria +woodward +calf +cigar +penetrate +##oso +willard +##rno +##uche +illustrate +amusing +convergence +noteworthy +##lma +##rva +journeys +realise +manfred +##sable +410 +##vocation +hearings +fiance +##posed +educators +provoked +adjusting +##cturing +modular +stockton +paterson +vlad +rejects +electors +selena +maureen +##tres +uber +##rce +swirled +##num +proportions +nanny +pawn +naturalist +parma +apostles +awoke +ethel +wen +##bey +monsoon +overview +##inating +mccain +rendition +risky +adorned +##ih +equestrian +germain +nj +conspicuous +confirming +##yoshi +shivering +##imeter +milestone +rumours +flinched +bounds +smacked +token +##bei +lectured +automobiles +##shore +impacted +##iable +nouns +nero +##leaf +ismail +prostitute +trams +##lace +bridget +sud +stimulus +impressions +reins +revolves +##oud +##gned +giro +honeymoon +##swell +criterion +##sms +##uil +libyan +prefers +##osition +211 +preview +sucks +accusation +bursts +metaphor +diffusion +tolerate +faye +betting +cinematographer +liturgical +specials +bitterly +humboldt +##ckle +flux +rattled +##itzer +archaeologists +odor +authorised +marshes +discretion +##ов +alarmed +archaic +inverse +##leton +explorers +##pine +drummond +tsunami +woodlands +##minate +##tland +booklet +insanity +owning +insert +crafted +calculus +##tore +receivers +##bt +stung +##eca +##nched +prevailing +travellers +eyeing +lila +graphs +##borne +178 +julien +##won +morale +adaptive +therapist +erica +cw +libertarian +bowman +pitches +vita +##ional +crook +##ads +##entation +caledonia +mutiny +##sible +1840s +automation +##ß +flock +##pia +ironic +pathology +##imus +remarried +##22 +joker +withstand +energies +##att +shropshire +hostages +madeleine +tentatively +conflicting +mateo +recipes +euros +ol +mercenaries +nico +##ndon +albuquerque +augmented +mythical +bel +freud +##child +cough +##lica +365 +freddy +lillian +genetically +nuremberg +calder +209 +bonn +outdoors +paste +suns +urgency +vin +restraint +tyson +##cera +##selle +barrage +bethlehem +kahn +##par +mounts +nippon +barony +happier +ryu +makeshift +sheldon +blushed +castillo +barking +listener +taped +bethel +fluent +headlines +pornography +rum +disclosure +sighing +mace +doubling +gunther +manly +##plex +rt +interventions +physiological +forwards +emerges +##tooth +##gny +compliment +rib +recession +visibly +barge +faults +connector +exquisite +prefect +##rlin +patio +##cured +elevators +brandt +italics +pena +173 +wasp +satin +ea +botswana +graceful +respectable +##jima +##rter +##oic +franciscan +generates +##dl +alfredo +disgusting +##olate +##iously +sherwood +warns +cod +promo +cheryl +sino +##ة +##escu +twitch +##zhi +brownish +thom +ortiz +##dron +densely +##beat +carmel +reinforce +##bana +187 +anastasia +downhill +vertex +contaminated +remembrance +harmonic +homework +##sol +fiancee +gears +olds +angelica +loft +ramsay +quiz +colliery +sevens +##cape +autism +##hil +walkway +##boats +ruben +abnormal +ounce +khmer +##bbe +zachary +bedside +morphology +punching +##olar +sparrow +convinces +##35 +hewitt +queer +remastered +rods +mabel +solemn +notified +lyricist +symmetric +##xide +174 +encore +passports +wildcats +##uni +baja +##pac +mildly +##ease +bleed +commodity +mounds +glossy +orchestras +##omo +damian +prelude +ambitions +##vet +awhile +remotely +##aud +asserts +imply +##iques +distinctly +modelling +remedy +##dded +windshield +dani +xiao +##endra +audible +powerplant +1300 +invalid +elemental +acquisitions +##hala +immaculate +libby +plata +smuggling +ventilation +denoted +minh +##morphism +430 +differed +dion +kelley +lore +mocking +sabbath +spikes +hygiene +drown +runoff +stylized +tally +liberated +aux +interpreter +righteous +aba +siren +reaper +pearce +millie +##cier +##yra +gaius +##iso +captures +##ttering +dorm +claudio +##sic +benches +knighted +blackness +##ored +discount +fumble +oxidation +routed +##ς +novak +perpendicular +spoiled +fracture +splits +##urt +pads +topology +##cats +axes +fortunate +offenders +protestants +esteem +221 +broadband +convened +frankly +hound +prototypes +isil +facilitated +keel +##sher +sahara +awaited +bubba +orb +prosecutors +186 +hem +520 +##xing +relaxing +remnant +romney +sorted +slalom +stefano +ulrich +##active +exemption +folder +pauses +foliage +hitchcock +epithet +204 +criticisms +##aca +ballistic +brody +hinduism +chaotic +youths +equals +##pala +pts +thicker +analogous +capitalist +improvised +overseeing +sinatra +ascended +beverage +##tl +straightforward +##kon +curran +##west +bois +325 +induce +surveying +emperors +sax +unpopular +##kk +cartoonist +fused +##mble +unto +##yuki +localities +##cko +##ln +darlington +slain +academie +lobbying +sediment +puzzles +##grass +defiance +dickens +manifest +tongues +alumnus +arbor +coincide +184 +appalachian +mustafa +examiner +cabaret +traumatic +yves +bracelet +draining +heroin +magnum +baths +odessa +consonants +mitsubishi +##gua +kellan +vaudeville +##fr +joked +null +straps +probation +##ław +ceded +interfaces +##pas +##zawa +blinding +viet +224 +rothschild +museo +640 +huddersfield +##vr +tactic +##storm +brackets +dazed +incorrectly +##vu +reg +glazed +fearful +manifold +benefited +irony +##sun +stumbling +##rte +willingness +balkans +mei +wraps +##aba +injected +##lea +gu +syed +harmless +##hammer +bray +takeoff +poppy +timor +cardboard +astronaut +purdue +weeping +southbound +cursing +stalls +diagonal +##neer +lamar +bryce +comte +weekdays +harrington +##uba +negatively +##see +lays +grouping +##cken +##henko +affirmed +halle +modernist +##lai +hodges +smelling +aristocratic +baptized +dismiss +justification +oilers +##now +coupling +qin +snack +healer +##qing +gardener +layla +battled +formulated +stephenson +gravitational +##gill +##jun +1768 +granny +coordinating +suites +##cd +##ioned +monarchs +##cote +##hips +sep +blended +apr +barrister +deposition +fia +mina +policemen +paranoid +##pressed +churchyard +covert +crumpled +creep +abandoning +tr +transmit +conceal +barr +understands +readiness +spire +##cology +##enia +##erry +610 +startling +unlock +vida +bowled +slots +##nat +##islav +spaced +trusting +admire +rig +##ink +slack +##70 +mv +207 +casualty +##wei +classmates +##odes +##rar +##rked +amherst +furnished +evolve +foundry +menace +mead +##lein +flu +wesleyan +##kled +monterey +webber +##vos +wil +##mith +##на +bartholomew +justices +restrained +##cke +amenities +191 +mediated +sewage +trenches +ml +mainz +##thus +1800s +##cula +##inski +caine +bonding +213 +converts +spheres +superseded +marianne +crypt +sweaty +ensign +historia +##br +spruce +##post +##ask +forks +thoughtfully +yukon +pamphlet +ames +##uter +karma +##yya +bryn +negotiation +sighs +incapable +##mbre +##ntial +actresses +taft +##mill +luce +prevailed +##amine +1773 +motionless +envoy +testify +investing +sculpted +instructors +provence +kali +cullen +horseback +##while +goodwin +##jos +gaa +norte +##ldon +modify +wavelength +abd +214 +skinned +sprinter +forecast +scheduling +marries +squared +tentative +##chman +boer +##isch +bolts +swap +fisherman +assyrian +impatiently +guthrie +martins +murdoch +194 +tanya +nicely +dolly +lacy +med +##45 +syn +decks +fashionable +millionaire +##ust +surfing +##ml +##ision +heaved +tammy +consulate +attendees +routinely +197 +fuse +saxophonist +backseat +malaya +##lord +scowl +tau +##ishly +193 +sighted +steaming +##rks +303 +911 +##holes +##hong +ching +##wife +bless +conserved +jurassic +stacey +unix +zion +chunk +rigorous +blaine +198 +peabody +slayer +dismay +brewers +nz +##jer +det +##glia +glover +postwar +int +penetration +sylvester +imitation +vertically +airlift +heiress +knoxville +viva +##uin +390 +macon +##rim +##fighter +##gonal +janice +##orescence +##wari +marius +belongings +leicestershire +196 +blanco +inverted +preseason +sanity +sobbing +##due +##elt +##dled +collingwood +regeneration +flickering +shortest +##mount +##osi +feminism +##lat +sherlock +cabinets +fumbled +northbound +precedent +snaps +##mme +researching +##akes +guillaume +insights +manipulated +vapor +neighbour +sap +gangster +frey +f1 +stalking +scarcely +callie +barnett +tendencies +audi +doomed +assessing +slung +panchayat +ambiguous +bartlett +##etto +distributing +violating +wolverhampton +##hetic +swami +histoire +##urus +liable +pounder +groin +hussain +larsen +popping +surprises +##atter +vie +curt +##station +mute +relocate +musicals +authorization +richter +##sef +immortality +tna +bombings +##press +deteriorated +yiddish +##acious +robbed +colchester +cs +pmid +ao +verified +balancing +apostle +swayed +recognizable +oxfordshire +retention +nottinghamshire +contender +judd +invitational +shrimp +uhf +##icient +cleaner +longitudinal +tanker +##mur +acronym +broker +koppen +sundance +suppliers +##gil +4000 +clipped +fuels +petite +##anne +landslide +helene +diversion +populous +landowners +auspices +melville +quantitative +##xes +ferries +nicky +##llus +doo +haunting +roche +carver +downed +unavailable +##pathy +approximation +hiroshima +##hue +garfield +valle +comparatively +keyboardist +traveler +##eit +congestion +calculating +subsidiaries +##bate +serb +modernization +fairies +deepened +ville +averages +##lore +inflammatory +tonga +##itch +co₂ +squads +##hea +gigantic +serum +enjoyment +retailer +verona +35th +cis +##phobic +magna +technicians +##vati +arithmetic +##sport +levin +##dation +amtrak +chow +sienna +##eyer +backstage +entrepreneurship +##otic +learnt +tao +##udy +worcestershire +formulation +baggage +hesitant +bali +sabotage +##kari +barren +enhancing +murmur +pl +freshly +putnam +syntax +aces +medicines +resentment +bandwidth +##sier +grins +chili +guido +##sei +framing +implying +gareth +lissa +genevieve +pertaining +admissions +geo +thorpe +proliferation +sato +bela +analyzing +parting +##gor +awakened +##isman +huddled +secrecy +##kling +hush +gentry +540 +dungeons +##ego +coasts +##utz +sacrificed +##chule +landowner +mutually +prevalence +programmer +adolescent +disrupted +seaside +gee +trusts +vamp +georgie +##nesian +##iol +schedules +sindh +##market +etched +hm +sparse +bey +beaux +scratching +gliding +unidentified +216 +collaborating +gems +jesuits +oro +accumulation +shaping +mbe +anal +##xin +231 +enthusiasts +newscast +##egan +janata +dewey +parkinson +179 +ankara +biennial +towering +dd +inconsistent +950 +##chet +thriving +terminate +cabins +furiously +eats +advocating +donkey +marley +muster +phyllis +leiden +##user +grassland +glittering +iucn +loneliness +217 +memorandum +armenians +##ddle +popularized +rhodesia +60s +lame +##illon +sans +bikini +header +orbits +##xx +##finger +##ulator +sharif +spines +biotechnology +strolled +naughty +yates +##wire +fremantle +milo +##mour +abducted +removes +##atin +humming +wonderland +##chrome +##ester +hume +pivotal +##rates +armand +grams +believers +elector +rte +apron +bis +scraped +##yria +endorsement +initials +##llation +eps +dotted +hints +buzzing +emigration +nearer +##tom +indicators +##ulu +coarse +neutron +protectorate +##uze +directional +exploits +pains +loire +1830s +proponents +guggenheim +rabbits +ritchie +305 +hectare +inputs +hutton +##raz +verify +##ako +boilers +longitude +##lev +skeletal +yer +emilia +citrus +compromised +##gau +pokemon +prescription +paragraph +eduard +cadillac +attire +categorized +kenyan +weddings +charley +##bourg +entertain +monmouth +##lles +nutrients +davey +mesh +incentive +practised +ecosystems +kemp +subdued +overheard +##rya +bodily +maxim +##nius +apprenticeship +ursula +##fight +lodged +rug +silesian +unconstitutional +patel +inspected +coyote +unbeaten +##hak +34th +disruption +convict +parcel +##cl +##nham +collier +implicated +mallory +##iac +##lab +susannah +winkler +##rber +shia +phelps +sediments +graphical +robotic +##sner +adulthood +mart +smoked +##isto +kathryn +clarified +##aran +divides +convictions +oppression +pausing +burying +##mt +federico +mathias +eileen +##tana +kite +hunched +##acies +189 +##atz +disadvantage +liza +kinetic +greedy +paradox +yokohama +dowager +trunks +ventured +##gement +gupta +vilnius +olaf +##thest +crimean +hopper +##ej +progressively +arturo +mouthed +arrondissement +##fusion +rubin +simulcast +oceania +##orum +##stra +##rred +busiest +intensely +navigator +cary +##vine +##hini +##bies +fife +rowe +rowland +posing +insurgents +shafts +lawsuits +activate +conor +inward +culturally +garlic +265 +##eering +eclectic +##hui +##kee +##nl +furrowed +vargas +meteorological +rendezvous +##aus +culinary +commencement +##dition +quota +##notes +mommy +salaries +overlapping +mule +##iology +##mology +sums +wentworth +##isk +##zione +mainline +subgroup +##illy +hack +plaintiff +verdi +bulb +differentiation +engagements +multinational +supplemented +bertrand +caller +regis +##naire +##sler +##arts +##imated +blossom +propagation +kilometer +viaduct +vineyards +##uate +beckett +optimization +golfer +songwriters +seminal +semitic +thud +volatile +evolving +ridley +##wley +trivial +distributions +scandinavia +jiang +##ject +wrestled +insistence +##dio +emphasizes +napkin +##ods +adjunct +rhyme +##ricted +##eti +hopeless +surrounds +tremble +32nd +smoky +##ntly +oils +medicinal +padded +steer +wilkes +219 +255 +concessions +hue +uniquely +blinded +landon +yahoo +##lane +hendrix +commemorating +dex +specify +chicks +##ggio +intercity +1400 +morley +##torm +highlighting +##oting +pang +oblique +stalled +##liner +flirting +newborn +1769 +bishopric +shaved +232 +currie +##ush +dharma +spartan +##ooped +favorites +smug +novella +sirens +abusive +creations +espana +##lage +paradigm +semiconductor +sheen +##rdo +##yen +##zak +nrl +renew +##pose +##tur +adjutant +marches +norma +##enity +ineffective +weimar +grunt +##gat +lordship +plotting +expenditure +infringement +lbs +refrain +av +mimi +mistakenly +postmaster +1771 +##bara +ras +motorsports +tito +199 +subjective +##zza +bully +stew +##kaya +prescott +1a +##raphic +##zam +bids +styling +paranormal +reeve +sneaking +exploding +katz +akbar +migrant +syllables +indefinitely +##ogical +destroys +replaces +applause +##phine +pest +##fide +218 +articulated +bertie +##thing +##cars +##ptic +courtroom +crowley +aesthetics +cummings +tehsil +hormones +titanic +dangerously +##ibe +stadion +jaenelle +auguste +ciudad +##chu +mysore +partisans +##sio +lucan +philipp +##aly +debating +henley +interiors +##rano +##tious +homecoming +beyonce +usher +henrietta +prepares +weeds +##oman +ely +plucked +##pire +##dable +luxurious +##aq +artifact +password +pasture +juno +maddy +minsk +##dder +##ologies +##rone +assessments +martian +royalist +1765 +examines +##mani +##rge +nino +223 +parry +scooped +relativity +##eli +##uting +##cao +congregational +noisy +traverse +##agawa +strikeouts +nickelodeon +obituary +transylvania +binds +depictions +polk +trolley +##yed +##lard +breeders +##under +dryly +hokkaido +1762 +strengths +stacks +bonaparte +connectivity +neared +prostitutes +stamped +anaheim +gutierrez +sinai +##zzling +bram +fresno +madhya +##86 +proton +##lena +##llum +##phon +reelected +wanda +##anus +##lb +ample +distinguishing +##yler +grasping +sermons +tomato +bland +stimulation +avenues +##eux +spreads +scarlett +fern +pentagon +assert +baird +chesapeake +ir +calmed +distortion +fatalities +##olis +correctional +pricing +##astic +##gina +prom +dammit +ying +collaborate +##chia +welterweight +33rd +pointer +substitution +bonded +umpire +communicating +multitude +paddle +##obe +federally +intimacy +##insky +betray +ssr +##lett +##lean +##lves +##therapy +airbus +##tery +functioned +ud +bearer +biomedical +netflix +##hire +##nca +condom +brink +ik +##nical +macy +##bet +flap +gma +experimented +jelly +lavender +##icles +##ulia +munro +##mian +##tial +rye +##rle +60th +gigs +hottest +rotated +predictions +fuji +bu +##erence +##omi +barangay +##fulness +##sas +clocks +##rwood +##liness +cereal +roe +wight +decker +uttered +babu +onion +xml +forcibly +##df +petra +sarcasm +hartley +peeled +storytelling +##42 +##xley +##ysis +##ffa +fibre +kiel +auditor +fig +harald +greenville +##berries +geographically +nell +quartz +##athic +cemeteries +##lr +crossings +nah +holloway +reptiles +chun +sichuan +snowy +660 +corrections +##ivo +zheng +ambassadors +blacksmith +fielded +fluids +hardcover +turnover +medications +melvin +academies +##erton +ro +roach +absorbing +spaniards +colton +##founded +outsider +espionage +kelsey +245 +edible +##ulf +dora +establishes +##sham +##tries +contracting +##tania +cinematic +costello +nesting +##uron +connolly +duff +##nology +mma +##mata +fergus +sexes +gi +optics +spectator +woodstock +banning +##hee +##fle +differentiate +outfielder +refinery +226 +312 +gerhard +horde +lair +drastically +##udi +landfall +##cheng +motorsport +odi +##achi +predominant +quay +skins +##ental +edna +harshly +complementary +murdering +##aves +wreckage +##90 +ono +outstretched +lennox +munitions +galen +reconcile +470 +scalp +bicycles +gillespie +questionable +rosenberg +guillermo +hostel +jarvis +kabul +volvo +opium +yd +##twined +abuses +decca +outpost +##cino +sensible +neutrality +##64 +ponce +anchorage +atkins +turrets +inadvertently +disagree +libre +vodka +reassuring +weighs +##yal +glide +jumper +ceilings +repertory +outs +stain +##bial +envy +##ucible +smashing +heightened +policing +hyun +mixes +lai +prima +##ples +celeste +##bina +lucrative +intervened +kc +manually +##rned +stature +staffed +bun +bastards +nairobi +priced +##auer +thatcher +##kia +tripped +comune +##ogan +##pled +brasil +incentives +emanuel +hereford +musica +##kim +benedictine +biennale +##lani +eureka +gardiner +rb +knocks +sha +##ael +##elled +##onate +efficacy +ventura +masonic +sanford +maize +leverage +##feit +capacities +santana +##aur +novelty +vanilla +##cter +##tour +benin +##oir +##rain +neptune +drafting +tallinn +##cable +humiliation +##boarding +schleswig +fabian +bernardo +liturgy +spectacle +sweeney +pont +routledge +##tment +cosmos +ut +hilt +sleek +universally +##eville +##gawa +typed +##dry +favors +allegheny +glaciers +##rly +recalling +aziz +##log +parasite +requiem +auf +##berto +##llin +illumination +##breaker +##issa +festivities +bows +govern +vibe +vp +333 +sprawled +larson +pilgrim +bwf +leaping +##rts +##ssel +alexei +greyhound +hoarse +##dler +##oration +seneca +##cule +gaping +##ulously +##pura +cinnamon +##gens +##rricular +craven +fantasies +houghton +engined +reigned +dictator +supervising +##oris +bogota +commentaries +unnatural +fingernails +spirituality +tighten +##tm +canadiens +protesting +intentional +cheers +sparta +##ytic +##iere +##zine +widen +belgarath +controllers +dodd +iaaf +navarre +##ication +defect +squire +steiner +whisky +##mins +560 +inevitably +tome +##gold +chew +##uid +##lid +elastic +##aby +streaked +alliances +jailed +regal +##ined +##phy +czechoslovak +narration +absently +##uld +bluegrass +guangdong +quran +criticizing +hose +hari +##liest +##owa +skier +streaks +deploy +##lom +raft +bose +dialed +huff +##eira +haifa +simplest +bursting +endings +ib +sultanate +##titled +franks +whitman +ensures +sven +##ggs +collaborators +forster +organising +ui +banished +napier +injustice +teller +layered +thump +##otti +roc +battleships +evidenced +fugitive +sadie +robotics +##roud +equatorial +geologist +##iza +yielding +##bron +##sr +internationale +mecca +##diment +sbs +skyline +toad +uploaded +reflective +undrafted +lal +leafs +bayern +##dai +lakshmi +shortlisted +##stick +##wicz +camouflage +donate +af +christi +lau +##acio +disclosed +nemesis +1761 +assemble +straining +northamptonshire +tal +##asi +bernardino +premature +heidi +42nd +coefficients +galactic +reproduce +buzzed +sensations +zionist +monsieur +myrtle +##eme +archery +strangled +musically +viewpoint +antiquities +bei +trailers +seahawks +cured +pee +preferring +tasmanian +lange +sul +##mail +##working +colder +overland +lucivar +massey +gatherings +haitian +##smith +disapproval +flaws +##cco +##enbach +1766 +npr +##icular +boroughs +creole +forums +techno +1755 +dent +abdominal +streetcar +##eson +##stream +procurement +gemini +predictable +##tya +acheron +christoph +feeder +fronts +vendor +bernhard +jammu +tumors +slang +##uber +goaltender +twists +curving +manson +vuelta +mer +peanut +confessions +pouch +unpredictable +allowance +theodor +vascular +##factory +bala +authenticity +metabolic +coughing +nanjing +##cea +pembroke +##bard +splendid +36th +ff +hourly +##ahu +elmer +handel +##ivate +awarding +thrusting +dl +experimentation +##hesion +##46 +caressed +entertained +steak +##rangle +biologist +orphans +baroness +oyster +stepfather +##dridge +mirage +reefs +speeding +##31 +barons +1764 +227 +inhabit +preached +repealed +##tral +honoring +boogie +captives +administer +johanna +##imate +gel +suspiciously +1767 +sobs +##dington +backbone +hayward +garry +##folding +##nesia +maxi +##oof +##ppe +ellison +galileo +##stand +crimea +frenzy +amour +bumper +matrices +natalia +baking +garth +palestinians +##grove +smack +conveyed +ensembles +gardening +##manship +##rup +##stituting +1640 +harvesting +topography +jing +shifters +dormitory +##carriage +##lston +ist +skulls +##stadt +dolores +jewellery +sarawak +##wai +##zier +fences +christy +confinement +tumbling +credibility +fir +stench +##bria +##plication +##nged +##sam +virtues +##belt +marjorie +pba +##eem +##made +celebrates +schooner +agitated +barley +fulfilling +anthropologist +##pro +restrict +novi +regulating +##nent +padres +##rani +##hesive +loyola +tabitha +milky +olson +proprietor +crambidae +guarantees +intercollegiate +ljubljana +hilda +##sko +ignorant +hooded +##lts +sardinia +##lidae +##vation +frontman +privileged +witchcraft +##gp +jammed +laude +poking +##than +bracket +amazement +yunnan +##erus +maharaja +linnaeus +264 +commissioning +milano +peacefully +##logies +akira +rani +regulator +##36 +grasses +##rance +luzon +crows +compiler +gretchen +seaman +edouard +tab +buccaneers +ellington +hamlets +whig +socialists +##anto +directorial +easton +mythological +##kr +##vary +rhineland +semantic +taut +dune +inventions +succeeds +##iter +replication +branched +##pired +jul +prosecuted +kangaroo +penetrated +##avian +middlesbrough +doses +bleak +madam +predatory +relentless +##vili +reluctance +##vir +hailey +crore +silvery +1759 +monstrous +swimmers +transmissions +hawthorn +informing +##eral +toilets +caracas +crouch +kb +##sett +295 +cartel +hadley +##aling +alexia +yvonne +##biology +cinderella +eton +superb +blizzard +stabbing +industrialist +maximus +##gm +##orus +groves +maud +clade +oversized +comedic +##bella +rosen +nomadic +fulham +montane +beverages +galaxies +redundant +swarm +##rot +##folia +##llis +buckinghamshire +fen +bearings +bahadur +##rom +gilles +phased +dynamite +faber +benoit +vip +##ount +##wd +booking +fractured +tailored +anya +spices +westwood +cairns +auditions +inflammation +steamed +##rocity +##acion +##urne +skyla +thereof +watford +torment +archdeacon +transforms +lulu +demeanor +fucked +serge +##sor +mckenna +minas +entertainer +##icide +caress +originate +residue +##sty +1740 +##ilised +##org +beech +##wana +subsidies +##ghton +emptied +gladstone +ru +firefighters +voodoo +##rcle +het +nightingale +tamara +edmond +ingredient +weaknesses +silhouette +285 +compatibility +withdrawing +hampson +##mona +anguish +giggling +##mber +bookstore +##jiang +southernmost +tilting +##vance +bai +economical +rf +briefcase +dreadful +hinted +projections +shattering +totaling +##rogate +analogue +indicted +periodical +fullback +##dman +haynes +##tenberg +##ffs +##ishment +1745 +thirst +stumble +penang +vigorous +##ddling +##kor +##lium +octave +##ove +##enstein +##inen +##ones +siberian +##uti +cbn +repeal +swaying +##vington +khalid +tanaka +unicorn +otago +plastered +lobe +riddle +##rella +perch +##ishing +croydon +filtered +graeme +tripoli +##ossa +crocodile +##chers +sufi +mined +##tung +inferno +lsu +##phi +swelled +utilizes +£2 +cale +periodicals +styx +hike +informally +coop +lund +##tidae +ala +hen +qui +transformations +disposed +sheath +chickens +##cade +fitzroy +sas +silesia +unacceptable +odisha +1650 +sabrina +pe +spokane +ratios +athena +massage +shen +dilemma +##drum +##riz +##hul +corona +doubtful +niall +##pha +##bino +fines +cite +acknowledging +bangor +ballard +bathurst +##resh +huron +mustered +alzheimer +garments +kinase +tyre +warship +##cp +flashback +pulmonary +braun +cheat +kamal +cyclists +constructions +grenades +ndp +traveller +excuses +stomped +signalling +trimmed +futsal +mosques +relevance +##wine +wta +##23 +##vah +##lter +hoc +##riding +optimistic +##´s +deco +sim +interacting +rejecting +moniker +waterways +##ieri +##oku +mayors +gdansk +outnumbered +pearls +##ended +##hampton +fairs +totals +dominating +262 +notions +stairway +compiling +pursed +commodities +grease +yeast +##jong +carthage +griffiths +residual +amc +contraction +laird +sapphire +##marine +##ivated +amalgamation +dissolve +inclination +lyle +packaged +altitudes +suez +canons +graded +lurched +narrowing +boasts +guise +wed +enrico +##ovsky +rower +scarred +bree +cub +iberian +protagonists +bargaining +proposing +trainers +voyages +vans +fishes +##aea +##ivist +##verance +encryption +artworks +kazan +sabre +cleopatra +hepburn +rotting +supremacy +mecklenburg +##brate +burrows +hazards +outgoing +flair +organizes +##ctions +scorpion +##usions +boo +234 +chevalier +dunedin +slapping +##34 +ineligible +pensions +##38 +##omic +manufactures +emails +bismarck +238 +weakening +blackish +ding +mcgee +quo +##rling +northernmost +xx +manpower +greed +sampson +clicking +##ange +##horpe +##inations +##roving +torre +##eptive +##moral +symbolism +38th +asshole +meritorious +outfits +splashed +biographies +sprung +astros +##tale +302 +737 +filly +raoul +nw +tokugawa +linden +clubhouse +##apa +tracts +romano +##pio +putin +tags +##note +chained +dickson +gunshot +moe +gunn +rashid +##tails +zipper +##bas +##nea +contrasted +##ply +##udes +plum +pharaoh +##pile +aw +comedies +ingrid +sandwiches +subdivisions +1100 +mariana +nokia +kamen +hz +delaney +veto +herring +##words +possessive +outlines +##roup +siemens +stairwell +rc +gallantry +messiah +palais +yells +233 +zeppelin +##dm +bolivar +##cede +smackdown +mckinley +##mora +##yt +muted +geologic +finely +unitary +avatar +hamas +maynard +rees +bog +contrasting +##rut +liv +chico +disposition +pixel +##erate +becca +dmitry +yeshiva +narratives +##lva +##ulton +mercenary +sharpe +tempered +navigate +stealth +amassed +keynes +##lini +untouched +##rrie +havoc +lithium +##fighting +abyss +graf +southward +wolverine +balloons +implements +ngos +transitions +##icum +ambushed +concacaf +dormant +economists +##dim +costing +csi +rana +universite +boulders +verity +##llon +collin +mellon +misses +cypress +fluorescent +lifeless +spence +##ulla +crewe +shepard +pak +revelations +##م +jolly +gibbons +paw +##dro +##quel +freeing +##test +shack +fries +palatine +##51 +##hiko +accompaniment +cruising +recycled +##aver +erwin +sorting +synthesizers +dyke +realities +sg +strides +enslaved +wetland +##ghan +competence +gunpowder +grassy +maroon +reactors +objection +##oms +carlson +gearbox +macintosh +radios +shelton +##sho +clergyman +prakash +254 +mongols +trophies +oricon +228 +stimuli +twenty20 +cantonese +cortes +mirrored +##saurus +bhp +cristina +melancholy +##lating +enjoyable +nuevo +##wny +downfall +schumacher +##ind +banging +lausanne +rumbled +paramilitary +reflex +ax +amplitude +migratory +##gall +##ups +midi +barnard +lastly +sherry +##hp +##nall +keystone +##kra +carleton +slippery +##53 +coloring +foe +socket +otter +##rgos +mats +##tose +consultants +bafta +bison +topping +##km +490 +primal +abandonment +transplant +atoll +hideous +mort +pained +reproduced +tae +howling +##turn +unlawful +billionaire +hotter +poised +lansing +##chang +dinamo +retro +messing +nfc +domesday +##mina +blitz +timed +##athing +##kley +ascending +gesturing +##izations +signaled +tis +chinatown +mermaid +savanna +jameson +##aint +catalina +##pet +##hers +cochrane +cy +chatting +##kus +alerted +computation +mused +noelle +majestic +mohawk +campo +octagonal +##sant +##hend +241 +aspiring +##mart +comprehend +iona +paralyzed +shimmering +swindon +rhone +##eley +reputed +configurations +pitchfork +agitation +francais +gillian +lipstick +##ilo +outsiders +pontifical +resisting +bitterness +sewer +rockies +##edd +##ucher +misleading +1756 +exiting +galloway +##nging +risked +##heart +246 +commemoration +schultz +##rka +integrating +##rsa +poses +shrieked +##weiler +guineas +gladys +jerking +owls +goldsmith +nightly +penetrating +##unced +lia +##33 +ignited +betsy +##aring +##thorpe +follower +vigorously +##rave +coded +kiran +knit +zoology +tbilisi +##28 +##bered +repository +govt +deciduous +dino +growling +##bba +enhancement +unleashed +chanting +pussy +biochemistry +##eric +kettle +repression +toxicity +nrhp +##arth +##kko +##bush +ernesto +commended +outspoken +242 +mca +parchment +sms +kristen +##aton +bisexual +raked +glamour +navajo +a2 +conditioned +showcased +##hma +spacious +youthful +##esa +usl +appliances +junta +brest +layne +conglomerate +enchanted +chao +loosened +picasso +circulating +inspect +montevideo +##centric +##kti +piazza +spurred +##aith +bari +freedoms +poultry +stamford +lieu +##ect +indigo +sarcastic +bahia +stump +attach +dvds +frankenstein +lille +approx +scriptures +pollen +##script +nmi +overseen +##ivism +tides +proponent +newmarket +inherit +milling +##erland +centralized +##rou +distributors +credentials +drawers +abbreviation +##lco +##xon +downing +uncomfortably +ripe +##oes +erase +franchises +##ever +populace +##bery +##khar +decomposition +pleas +##tet +daryl +sabah +##stle +##wide +fearless +genie +lesions +annette +##ogist +oboe +appendix +nair +dripped +petitioned +maclean +mosquito +parrot +rpg +hampered +1648 +operatic +reservoirs +##tham +irrelevant +jolt +summarized +##fp +medallion +##taff +##− +clawed +harlow +narrower +goddard +marcia +bodied +fremont +suarez +altering +tempest +mussolini +porn +##isms +sweetly +oversees +walkers +solitude +grimly +shrines +hk +ich +supervisors +hostess +dietrich +legitimacy +brushes +expressive +##yp +dissipated +##rse +localized +systemic +##nikov +gettysburg +##js +##uaries +dialogues +muttering +251 +housekeeper +sicilian +discouraged +##frey +beamed +kaladin +halftime +kidnap +##amo +##llet +1754 +synonymous +depleted +instituto +insulin +reprised +##opsis +clashed +##ctric +interrupting +radcliffe +insisting +medici +1715 +ejected +playfully +turbulent +##47 +starvation +##rini +shipment +rebellious +petersen +verification +merits +##rified +cakes +##charged +1757 +milford +shortages +spying +fidelity +##aker +emitted +storylines +harvested +seismic +##iform +cheung +kilda +theoretically +barbie +lynx +##rgy +##tius +goblin +mata +poisonous +##nburg +reactive +residues +obedience +##евич +conjecture +##rac +401 +hating +sixties +kicker +moaning +motown +##bha +emancipation +neoclassical +##hering +consoles +ebert +professorship +##tures +sustaining +assaults +obeyed +affluent +incurred +tornadoes +##eber +##zow +emphasizing +highlanders +cheated +helmets +##ctus +internship +terence +bony +executions +legislators +berries +peninsular +tinged +##aco +1689 +amplifier +corvette +ribbons +lavish +pennant +##lander +worthless +##chfield +##forms +mariano +pyrenees +expenditures +##icides +chesterfield +mandir +tailor +39th +sergey +nestled +willed +aristocracy +devotees +goodnight +raaf +rumored +weaponry +remy +appropriations +harcourt +burr +riaa +##lence +limitation +unnoticed +guo +soaking +swamps +##tica +collapsing +tatiana +descriptive +brigham +psalm +##chment +maddox +##lization +patti +caliph +##aja +akron +injuring +serra +##ganj +basins +##sari +astonished +launcher +##church +hilary +wilkins +sewing +##sf +stinging +##fia +##ncia +underwood +startup +##ition +compilations +vibrations +embankment +jurist +##nity +bard +juventus +groundwater +kern +palaces +helium +boca +cramped +marissa +soto +##worm +jae +princely +##ggy +faso +bazaar +warmly +##voking +229 +pairing +##lite +##grate +##nets +wien +freaked +ulysses +rebirth +##alia +##rent +mummy +guzman +jimenez +stilled +##nitz +trajectory +tha +woken +archival +professions +##pts +##pta +hilly +shadowy +shrink +##bolt +norwood +glued +migrate +stereotypes +devoid +##pheus +625 +evacuate +horrors +infancy +gotham +knowles +optic +downloaded +sachs +kingsley +parramatta +darryl +mor +##onale +shady +commence +confesses +kan +##meter +##placed +marlborough +roundabout +regents +frigates +io +##imating +gothenburg +revoked +carvings +clockwise +convertible +intruder +##sche +banged +##ogo +vicky +bourgeois +##mony +dupont +footing +##gum +pd +##real +buckle +yun +penthouse +sane +720 +serviced +stakeholders +neumann +bb +##eers +comb +##gam +catchment +pinning +rallies +typing +##elles +forefront +freiburg +sweetie +giacomo +widowed +goodwill +worshipped +aspirations +midday +##vat +fishery +##trick +bournemouth +turk +243 +hearth +ethanol +guadalajara +murmurs +sl +##uge +afforded +scripted +##hta +wah +##jn +coroner +translucent +252 +memorials +puck +progresses +clumsy +##race +315 +candace +recounted +##27 +##slin +##uve +filtering +##mac +howl +strata +heron +leveled +##ays +dubious +##oja +##т +##wheel +citations +exhibiting +##laya +##mics +##pods +turkic +##lberg +injunction +##ennial +##mit +antibodies +##44 +organise +##rigues +cardiovascular +cushion +inverness +##zquez +dia +cocoa +sibling +##tman +##roid +expanse +feasible +tunisian +algiers +##relli +rus +bloomberg +dso +westphalia +bro +tacoma +281 +downloads +##ours +konrad +duran +##hdi +continuum +jett +compares +legislator +secession +##nable +##gues +##zuka +translating +reacher +##gley +##ła +aleppo +##agi +tc +orchards +trapping +linguist +versatile +drumming +postage +calhoun +superiors +##mx +barefoot +leary +##cis +ignacio +alfa +kaplan +##rogen +bratislava +mori +##vot +disturb +haas +313 +cartridges +gilmore +radiated +salford +tunic +hades +##ulsive +archeological +delilah +magistrates +auditioned +brewster +charters +empowerment +blogs +cappella +dynasties +iroquois +whipping +##krishna +raceway +truths +myra +weaken +judah +mcgregor +##horse +mic +refueling +37th +burnley +bosses +markus +premio +query +##gga +dunbar +##economic +darkest +lyndon +sealing +commendation +reappeared +##mun +addicted +ezio +slaughtered +satisfactory +shuffle +##eves +##thic +##uj +fortification +warrington +##otto +resurrected +fargo +mane +##utable +##lei +##space +foreword +ox +##aris +##vern +abrams +hua +##mento +sakura +##alo +uv +sentimental +##skaya +midfield +##eses +sturdy +scrolls +macleod +##kyu +entropy +##lance +mitochondrial +cicero +excelled +thinner +convoys +perceive +##oslav +##urable +systematically +grind +burkina +287 +##tagram +ops +##aman +guantanamo +##cloth +##tite +forcefully +wavy +##jou +pointless +##linger +##tze +layton +portico +superficial +clerical +outlaws +##hism +burials +muir +##inn +creditors +hauling +rattle +##leg +calais +monde +archers +reclaimed +dwell +wexford +hellenic +falsely +remorse +##tek +dough +furnishings +##uttered +gabon +neurological +novice +##igraphy +contemplated +pulpit +nightstand +saratoga +##istan +documenting +pulsing +taluk +##firmed +busted +marital +##rien +disagreements +wasps +##yes +hodge +mcdonnell +mimic +fran +pendant +dhabi +musa +##nington +congratulations +argent +darrell +concussion +losers +regrets +thessaloniki +reversal +donaldson +hardwood +thence +achilles +ritter +##eran +demonic +jurgen +prophets +goethe +eki +classmate +buff +##cking +yank +irrational +##inging +perished +seductive +qur +sourced +##crat +##typic +mustard +ravine +barre +horizontally +characterization +phylogenetic +boise +##dit +##runner +##tower +brutally +intercourse +seduce +##bbing +fay +ferris +ogden +amar +nik +unarmed +##inator +evaluating +kyrgyzstan +sweetness +##lford +##oki +mccormick +meiji +notoriety +stimulate +disrupt +figuring +instructional +mcgrath +##zoo +groundbreaking +##lto +flinch +khorasan +agrarian +bengals +mixer +radiating +##sov +ingram +pitchers +nad +tariff +##cript +tata +##codes +##emi +##ungen +appellate +lehigh +##bled +##giri +brawl +duct +texans +##ciation +##ropolis +skipper +speculative +vomit +doctrines +stresses +253 +davy +graders +whitehead +jozef +timely +cumulative +haryana +paints +appropriately +boon +cactus +##ales +##pid +dow +legions +##pit +perceptions +1730 +picturesque +##yse +periphery +rune +wr +##aha +celtics +sentencing +whoa +##erin +confirms +variance +425 +moines +mathews +spade +rave +m1 +fronted +fx +blending +alleging +reared +##gl +237 +##paper +grassroots +eroded +##free +##physical +directs +ordeal +##sław +accelerate +hacker +rooftop +##inia +lev +buys +cebu +devote +##lce +specialising +##ulsion +choreographed +repetition +warehouses +##ryl +paisley +tuscany +analogy +sorcerer +hash +huts +shards +descends +exclude +nix +chaplin +gaga +ito +vane +##drich +causeway +misconduct +limo +orchestrated +glands +jana +##kot +u2 +##mple +##sons +branching +contrasts +scoop +longed +##virus +chattanooga +##75 +syrup +cornerstone +##tized +##mind +##iaceae +careless +precedence +frescoes +##uet +chilled +consult +modelled +snatch +peat +##thermal +caucasian +humane +relaxation +spins +temperance +##lbert +occupations +lambda +hybrids +moons +mp3 +##oese +247 +rolf +societal +yerevan +ness +##ssler +befriended +mechanized +nominate +trough +boasted +cues +seater +##hom +bends +##tangle +conductors +emptiness +##lmer +eurasian +adriatic +tian +##cie +anxiously +lark +propellers +chichester +jock +ev +2a +##holding +credible +recounts +tori +loyalist +abduction +##hoot +##redo +nepali +##mite +ventral +tempting +##ango +##crats +steered +##wice +javelin +dipping +laborers +prentice +looming +titanium +##ː +badges +emir +tensor +##ntation +egyptians +rash +denies +hawthorne +lombard +showers +wehrmacht +dietary +trojan +##reus +welles +executing +horseshoe +lifeboat +##lak +elsa +infirmary +nearing +roberta +boyer +mutter +trillion +joanne +##fine +##oked +sinks +vortex +uruguayan +clasp +sirius +##block +accelerator +prohibit +sunken +byu +chronological +diplomats +ochreous +510 +symmetrical +1644 +maia +##tology +salts +reigns +atrocities +##ия +hess +bared +issn +##vyn +cater +saturated +##cycle +##isse +sable +voyager +dyer +yusuf +##inge +fountains +wolff +##39 +##nni +engraving +rollins +atheist +ominous +##ault +herr +chariot +martina +strung +##fell +##farlane +horrific +sahib +gazes +saetan +erased +ptolemy +##olic +flushing +lauderdale +analytic +##ices +530 +navarro +beak +gorilla +herrera +broom +guadalupe +raiding +sykes +311 +bsc +deliveries +1720 +invasions +carmichael +tajikistan +thematic +ecumenical +sentiments +onstage +##rians +##brand +##sume +catastrophic +flanks +molten +##arns +waller +aimee +terminating +##icing +alternately +##oche +nehru +printers +outraged +##eving +empires +template +banners +repetitive +za +##oise +vegetarian +##tell +guiana +opt +cavendish +lucknow +synthesized +##hani +##mada +finalized +##ctable +fictitious +mayoral +unreliable +##enham +embracing +peppers +rbis +##chio +##neo +inhibition +slashed +togo +orderly +embroidered +safari +salty +236 +barron +benito +totaled +##dak +pubs +simulated +caden +devin +tolkien +momma +welding +sesame +##ept +gottingen +hardness +630 +shaman +temeraire +620 +adequately +pediatric +##kit +ck +assertion +radicals +composure +cadence +seafood +beaufort +lazarus +mani +warily +cunning +kurdistan +249 +cantata +##kir +ares +##41 +##clusive +nape +townland +geared +insulted +flutter +boating +violate +draper +dumping +malmo +##hh +##romatic +firearm +alta +bono +obscured +##clave +exceeds +panorama +unbelievable +##train +preschool +##essed +disconnected +installing +rescuing +secretaries +accessibility +##castle +##drive +##ifice +##film +bouts +slug +waterway +mindanao +##buro +##ratic +halves +##ل +calming +liter +maternity +adorable +bragg +electrification +mcc +##dote +roxy +schizophrenia +##body +munoz +kaye +whaling +239 +mil +tingling +tolerant +##ago +unconventional +volcanoes +##finder +deportivo +##llie +robson +kaufman +neuroscience +wai +deportation +masovian +scraping +converse +##bh +hacking +bulge +##oun +administratively +yao +580 +amp +mammoth +booster +claremont +hooper +nomenclature +pursuits +mclaughlin +melinda +##sul +catfish +barclay +substrates +taxa +zee +originals +kimberly +packets +padma +##ality +borrowing +ostensibly +solvent +##bri +##genesis +##mist +lukas +shreveport +veracruz +##ь +##lou +##wives +cheney +tt +anatolia +hobbs +##zyn +cyclic +radiant +alistair +greenish +siena +dat +independents +##bation +conform +pieter +hyper +applicant +bradshaw +spores +telangana +vinci +inexpensive +nuclei +322 +jang +nme +soho +spd +##ign +cradled +receptionist +pow +##43 +##rika +fascism +##ifer +experimenting +##ading +##iec +##region +345 +jocelyn +maris +stair +nocturnal +toro +constabulary +elgin +##kker +msc +##giving +##schen +##rase +doherty +doping +sarcastically +batter +maneuvers +##cano +##apple +##gai +##git +intrinsic +##nst +##stor +1753 +showtime +cafes +gasps +lviv +ushered +##thed +fours +restart +astonishment +transmitting +flyer +shrugs +##sau +intriguing +cones +dictated +mushrooms +medial +##kovsky +##elman +escorting +gaped +##26 +godfather +##door +##sell +djs +recaptured +timetable +vila +1710 +3a +aerodrome +mortals +scientology +##orne +angelina +mag +convection +unpaid +insertion +intermittent +lego +##nated +endeavor +kota +pereira +##lz +304 +bwv +glamorgan +insults +agatha +fey +##cend +fleetwood +mahogany +protruding +steamship +zeta +##arty +mcguire +suspense +##sphere +advising +urges +##wala +hurriedly +meteor +gilded +inline +arroyo +stalker +##oge +excitedly +revered +##cure +earle +introductory +##break +##ilde +mutants +puff +pulses +reinforcement +##haling +curses +lizards +stalk +correlated +##fixed +fallout +macquarie +##unas +bearded +denton +heaving +802 +##ocation +winery +assign +dortmund +##lkirk +everest +invariant +charismatic +susie +##elling +bled +lesley +telegram +sumner +bk +##ogen +##к +wilcox +needy +colbert +duval +##iferous +##mbled +allotted +attends +imperative +##hita +replacements +hawker +##inda +insurgency +##zee +##eke +casts +##yla +680 +ives +transitioned +##pack +##powering +authoritative +baylor +flex +cringed +plaintiffs +woodrow +##skie +drastic +ape +aroma +unfolded +commotion +nt +preoccupied +theta +routines +lasers +privatization +wand +domino +ek +clenching +nsa +strategically +showered +bile +handkerchief +pere +storing +christophe +insulting +316 +nakamura +romani +asiatic +magdalena +palma +cruises +stripping +405 +konstantin +soaring +##berman +colloquially +forerunner +havilland +incarcerated +parasites +sincerity +##utus +disks +plank +saigon +##ining +corbin +homo +ornaments +powerhouse +##tlement +chong +fastened +feasibility +idf +morphological +usable +##nish +##zuki +aqueduct +jaguars +keepers +##flies +aleksandr +faust +assigns +ewing +bacterium +hurled +tricky +hungarians +integers +wallis +321 +yamaha +##isha +hushed +oblivion +aviator +evangelist +friars +##eller +monograph +ode +##nary +airplanes +labourers +charms +##nee +1661 +hagen +tnt +rudder +fiesta +transcript +dorothea +ska +inhibitor +maccabi +retorted +raining +encompassed +clauses +menacing +1642 +lineman +##gist +vamps +##ape +##dick +gloom +##rera +dealings +easing +seekers +##nut +##pment +helens +unmanned +##anu +##isson +basics +##amy +##ckman +adjustments +1688 +brutality +horne +##zell +sui +##55 +##mable +aggregator +##thal +rhino +##drick +##vira +counters +zoom +##01 +##rting +mn +montenegrin +packard +##unciation +##♭ +##kki +reclaim +scholastic +thugs +pulsed +##icia +syriac +quan +saddam +banda +kobe +blaming +buddies +dissent +##lusion +##usia +corbett +jaya +delle +erratic +lexie +##hesis +435 +amiga +hermes +##pressing +##leen +chapels +gospels +jamal +##uating +compute +revolving +warp +##sso +##thes +armory +##eras +##gol +antrim +loki +##kow +##asian +##good +##zano +braid +handwriting +subdistrict +funky +pantheon +##iculate +concurrency +estimation +improper +juliana +##his +newcomers +johnstone +staten +communicated +##oco +##alle +sausage +stormy +##stered +##tters +superfamily +##grade +acidic +collateral +tabloid +##oped +##rza +bladder +austen +##ellant +mcgraw +##hay +hannibal +mein +aquino +lucifer +wo +badger +boar +cher +christensen +greenberg +interruption +##kken +jem +244 +mocked +bottoms +cambridgeshire +##lide +sprawling +##bbly +eastwood +ghent +synth +##buck +advisers +##bah +nominally +hapoel +qu +daggers +estranged +fabricated +towels +vinnie +wcw +misunderstanding +anglia +nothin +unmistakable +##dust +##lova +chilly +marquette +truss +##edge +##erine +reece +##lty +##chemist +##connected +272 +308 +41st +bash +raion +waterfalls +##ump +##main +labyrinth +queue +theorist +##istle +bharatiya +flexed +soundtracks +rooney +leftist +patrolling +wharton +plainly +alleviate +eastman +schuster +topographic +engages +immensely +unbearable +fairchild +1620 +dona +lurking +parisian +oliveira +ia +indictment +hahn +bangladeshi +##aster +vivo +##uming +##ential +antonia +expects +indoors +kildare +harlan +##logue +##ogenic +##sities +forgiven +##wat +childish +tavi +##mide +##orra +plausible +grimm +successively +scooted +##bola +##dget +##rith +spartans +emery +flatly +azure +epilogue +##wark +flourish +##iny +##tracted +##overs +##oshi +bestseller +distressed +receipt +spitting +hermit +topological +##cot +drilled +subunit +francs +##layer +eel +##fk +##itas +octopus +footprint +petitions +ufo +##say +##foil +interfering +leaking +palo +##metry +thistle +valiant +##pic +narayan +mcpherson +##fast +gonzales +##ym +##enne +dustin +novgorod +solos +##zman +doin +##raph +##patient +##meyer +soluble +ashland +cuffs +carole +pendleton +whistling +vassal +##river +deviation +revisited +constituents +rallied +rotate +loomed +##eil +##nting +amateurs +augsburg +auschwitz +crowns +skeletons +##cona +bonnet +257 +dummy +globalization +simeon +sleeper +mandal +differentiated +##crow +##mare +milne +bundled +exasperated +talmud +owes +segregated +##feng +##uary +dentist +piracy +props +##rang +devlin +##torium +malicious +paws +##laid +dependency +##ergy +##fers +##enna +258 +pistons +rourke +jed +grammatical +tres +maha +wig +512 +ghostly +jayne +##achal +##creen +##ilis +##lins +##rence +designate +##with +arrogance +cambodian +clones +showdown +throttle +twain +##ception +lobes +metz +nagoya +335 +braking +##furt +385 +roaming +##minster +amin +crippled +##37 +##llary +indifferent +hoffmann +idols +intimidating +1751 +261 +influenza +memo +onions +1748 +bandage +consciously +##landa +##rage +clandestine +observes +swiped +tangle +##ener +##jected +##trum +##bill +##lta +hugs +congresses +josiah +spirited +##dek +humanist +managerial +filmmaking +inmate +rhymes +debuting +grimsby +ur +##laze +duplicate +vigor +##tf +republished +bolshevik +refurbishment +antibiotics +martini +methane +newscasts +royale +horizons +levant +iain +visas +##ischen +paler +##around +manifestation +snuck +alf +chop +futile +pedestal +rehab +##kat +bmg +kerman +res +fairbanks +jarrett +abstraction +saharan +##zek +1746 +procedural +clearer +kincaid +sash +luciano +##ffey +crunch +helmut +##vara +revolutionaries +##tute +creamy +leach +##mmon +1747 +permitting +nes +plight +wendell +##lese +contra +ts +clancy +ipa +mach +staples +autopsy +disturbances +nueva +karin +pontiac +##uding +proxy +venerable +haunt +leto +bergman +expands +##helm +wal +##pipe +canning +celine +cords +obesity +##enary +intrusion +planner +##phate +reasoned +sequencing +307 +harrow +##chon +##dora +marred +mcintyre +repay +tarzan +darting +248 +harrisburg +margarita +repulsed +##hur +##lding +belinda +hamburger +novo +compliant +runways +bingham +registrar +skyscraper +ic +cuthbert +improvisation +livelihood +##corp +##elial +admiring +##dened +sporadic +believer +casablanca +popcorn +##29 +asha +shovel +##bek +##dice +coiled +tangible +##dez +casper +elsie +resin +tenderness +rectory +##ivision +avail +sonar +##mori +boutique +##dier +guerre +bathed +upbringing +vaulted +sandals +blessings +##naut +##utnant +1680 +306 +foxes +pia +corrosion +hesitantly +confederates +crystalline +footprints +shapiro +tirana +valentin +drones +45th +microscope +shipments +texted +inquisition +wry +guernsey +unauthorized +resigning +760 +ripple +schubert +stu +reassure +felony +##ardo +brittle +koreans +##havan +##ives +dun +implicit +tyres +##aldi +##lth +magnolia +##ehan +##puri +##poulos +aggressively +fei +gr +familiarity +##poo +indicative +##trust +fundamentally +jimmie +overrun +395 +anchors +moans +##opus +britannia +armagh +##ggle +purposely +seizing +##vao +bewildered +mundane +avoidance +cosmopolitan +geometridae +quartermaster +caf +415 +chatter +engulfed +gleam +purge +##icate +juliette +jurisprudence +guerra +revisions +##bn +casimir +brew +##jm +1749 +clapton +cloudy +conde +hermitage +278 +simulations +torches +vincenzo +matteo +##rill +hidalgo +booming +westbound +accomplishment +tentacles +unaffected +##sius +annabelle +flopped +sloping +##litz +dreamer +interceptor +vu +##loh +consecration +copying +messaging +breaker +climates +hospitalized +1752 +torino +afternoons +winfield +witnessing +##teacher +breakers +choirs +sawmill +coldly +##ege +sipping +haste +uninhabited +conical +bibliography +pamphlets +severn +edict +##oca +deux +illnesses +grips +##pl +rehearsals +sis +thinkers +tame +##keepers +1690 +acacia +reformer +##osed +##rys +shuffling +##iring +##shima +eastbound +ionic +rhea +flees +littered +##oum +rocker +vomiting +groaning +champ +overwhelmingly +civilizations +paces +sloop +adoptive +##tish +skaters +##vres +aiding +mango +##joy +nikola +shriek +##ignon +pharmaceuticals +##mg +tuna +calvert +gustavo +stocked +yearbook +##urai +##mana +computed +subsp +riff +hanoi +kelvin +hamid +moors +pastures +summons +jihad +nectar +##ctors +bayou +untitled +pleasing +vastly +republics +intellect +##η +##ulio +##tou +crumbling +stylistic +sb +##ی +consolation +frequented +h₂o +walden +widows +##iens +404 +##ignment +chunks +improves +288 +grit +recited +##dev +snarl +sociological +##arte +##gul +inquired +##held +bruise +clube +consultancy +homogeneous +hornets +multiplication +pasta +prick +savior +##grin +##kou +##phile +yoon +##gara +grimes +vanishing +cheering +reacting +bn +distillery +##quisite +##vity +coe +dockyard +massif +##jord +escorts +voss +##valent +byte +chopped +hawke +illusions +workings +floats +##koto +##vac +kv +annapolis +madden +##onus +alvaro +noctuidae +##cum +##scopic +avenge +steamboat +forte +illustrates +erika +##trip +570 +dew +nationalities +bran +manifested +thirsty +diversified +muscled +reborn +##standing +arson +##lessness +##dran +##logram +##boys +##kushima +##vious +willoughby +##phobia +286 +alsace +dashboard +yuki +##chai +granville +myspace +publicized +tricked +##gang +adjective +##ater +relic +reorganisation +enthusiastically +indications +saxe +##lassified +consolidate +iec +padua +helplessly +ramps +renaming +regulars +pedestrians +accents +convicts +inaccurate +lowers +mana +##pati +barrie +bjp +outta +someplace +berwick +flanking +invoked +marrow +sparsely +excerpts +clothed +rei +##ginal +wept +##straße +##vish +alexa +excel +##ptive +membranes +aquitaine +creeks +cutler +sheppard +implementations +ns +##dur +fragrance +budge +concordia +magnesium +marcelo +##antes +gladly +vibrating +##rral +##ggles +montrose +##omba +lew +seamus +1630 +cocky +##ament +##uen +bjorn +##rrick +fielder +fluttering +##lase +methyl +kimberley +mcdowell +reductions +barbed +##jic +##tonic +aeronautical +condensed +distracting +##promising +huffed +##cala +##sle +claudius +invincible +missy +pious +balthazar +ci +##lang +butte +combo +orson +##dication +myriad +1707 +silenced +##fed +##rh +coco +netball +yourselves +##oza +clarify +heller +peg +durban +etudes +offender +roast +blackmail +curvature +##woods +vile +309 +illicit +suriname +##linson +overture +1685 +bubbling +gymnast +tucking +##mming +##ouin +maldives +##bala +gurney +##dda +##eased +##oides +backside +pinto +jars +racehorse +tending +##rdial +baronetcy +wiener +duly +##rke +barbarian +cupping +flawed +##thesis +bertha +pleistocene +puddle +swearing +##nob +##tically +fleeting +prostate +amulet +educating +##mined +##iti +##tler +75th +jens +respondents +analytics +cavaliers +papacy +raju +##iente +##ulum +##tip +funnel +271 +disneyland +##lley +sociologist +##iam +2500 +faulkner +louvre +menon +##dson +276 +##ower +afterlife +mannheim +peptide +referees +comedians +meaningless +##anger +##laise +fabrics +hurley +renal +sleeps +##bour +##icle +breakout +kristin +roadside +animator +clover +disdain +unsafe +redesign +##urity +firth +barnsley +portage +reset +narrows +268 +commandos +expansive +speechless +tubular +##lux +essendon +eyelashes +smashwords +##yad +##bang +##claim +craved +sprinted +chet +somme +astor +wrocław +orton +266 +bane +##erving +##uing +mischief +##amps +##sund +scaling +terre +##xious +impairment +offenses +undermine +moi +soy +contiguous +arcadia +inuit +seam +##tops +macbeth +rebelled +##icative +##iot +590 +elaborated +frs +uniformed +##dberg +259 +powerless +priscilla +stimulated +980 +qc +arboretum +frustrating +trieste +bullock +##nified +enriched +glistening +intern +##adia +locus +nouvelle +ollie +ike +lash +starboard +ee +tapestry +headlined +hove +rigged +##vite +pollock +##yme +thrive +clustered +cas +roi +gleamed +olympiad +##lino +pressured +regimes +##hosis +##lick +ripley +##ophone +kickoff +gallon +rockwell +##arable +crusader +glue +revolutions +scrambling +1714 +grover +##jure +englishman +aztec +263 +contemplating +coven +ipad +preach +triumphant +tufts +##esian +rotational +##phus +328 +falkland +##brates +strewn +clarissa +rejoin +environmentally +glint +banded +drenched +moat +albanians +johor +rr +maestro +malley +nouveau +shaded +taxonomy +v6 +adhere +bunk +airfields +##ritan +1741 +encompass +remington +tran +##erative +amelie +mazda +friar +morals +passions +##zai +breadth +vis +##hae +argus +burnham +caressing +insider +rudd +##imov +##mini +##rso +italianate +murderous +textual +wainwright +armada +bam +weave +timer +##taken +##nh +fra +##crest +ardent +salazar +taps +tunis +##ntino +allegro +gland +philanthropic +##chester +implication +##optera +esq +judas +noticeably +wynn +##dara +inched +indexed +crises +villiers +bandit +royalties +patterned +cupboard +interspersed +accessory +isla +kendrick +entourage +stitches +##esthesia +headwaters +##ior +interlude +distraught +draught +1727 +##basket +biased +sy +transient +triad +subgenus +adapting +kidd +shortstop +##umatic +dimly +spiked +mcleod +reprint +nellie +pretoria +windmill +##cek +singled +##mps +273 +reunite +##orous +747 +bankers +outlying +##omp +##ports +##tream +apologies +cosmetics +patsy +##deh +##ocks +##yson +bender +nantes +serene +##nad +lucha +mmm +323 +##cius +##gli +cmll +coinage +nestor +juarez +##rook +smeared +sprayed +twitching +sterile +irina +embodied +juveniles +enveloped +miscellaneous +cancers +dq +gulped +luisa +crested +swat +donegal +ref +##anov +##acker +hearst +mercantile +##lika +doorbell +ua +vicki +##alla +##som +bilbao +psychologists +stryker +sw +horsemen +turkmenistan +wits +##national +anson +mathew +screenings +##umb +rihanna +##agne +##nessy +aisles +##iani +##osphere +hines +kenton +saskatoon +tasha +truncated +##champ +##itan +mildred +advises +fredrik +interpreting +inhibitors +##athi +spectroscopy +##hab +##kong +karim +panda +##oia +##nail +##vc +conqueror +kgb +leukemia +##dity +arrivals +cheered +pisa +phosphorus +shielded +##riated +mammal +unitarian +urgently +chopin +sanitary +##mission +spicy +drugged +hinges +##tort +tipping +trier +impoverished +westchester +##caster +267 +epoch +nonstop +##gman +##khov +aromatic +centrally +cerro +##tively +##vio +billions +modulation +sedimentary +283 +facilitating +outrageous +goldstein +##eak +##kt +ld +maitland +penultimate +pollard +##dance +fleets +spaceship +vertebrae +##nig +alcoholism +als +recital +##bham +##ference +##omics +m2 +##bm +trois +##tropical +##в +commemorates +##meric +marge +##raction +1643 +670 +cosmetic +ravaged +##ige +catastrophe +eng +##shida +albrecht +arterial +bellamy +decor +harmon +##rde +bulbs +synchronized +vito +easiest +shetland +shielding +wnba +##glers +##ssar +##riam +brianna +cumbria +##aceous +##rard +cores +thayer +##nsk +brood +hilltop +luminous +carts +keynote +larkin +logos +##cta +##ا +##mund +##quay +lilith +tinted +277 +wrestle +mobilization +##uses +sequential +siam +bloomfield +takahashi +274 +##ieving +presenters +ringo +blazed +witty +##oven +##ignant +devastation +haydn +harmed +newt +therese +##peed +gershwin +molina +rabbis +sudanese +001 +innate +restarted +##sack +##fus +slices +wb +##shah +enroll +hypothetical +hysterical +1743 +fabio +indefinite +warped +##hg +exchanging +525 +unsuitable +##sboro +gallo +1603 +bret +cobalt +homemade +##hunter +mx +operatives +##dhar +terraces +durable +latch +pens +whorls +##ctuated +##eaux +billing +ligament +succumbed +##gly +regulators +spawn +##brick +##stead +filmfare +rochelle +##nzo +1725 +circumstance +saber +supplements +##nsky +##tson +crowe +wellesley +carrot +##9th +##movable +primate +drury +sincerely +topical +##mad +##rao +callahan +kyiv +smarter +tits +undo +##yeh +announcements +anthologies +barrio +nebula +##islaus +##shaft +##tyn +bodyguards +2021 +assassinate +barns +emmett +scully +##mah +##yd +##eland +##tino +##itarian +demoted +gorman +lashed +prized +adventist +writ +##gui +alla +invertebrates +##ausen +1641 +amman +1742 +align +healy +redistribution +##gf +##rize +insulation +##drop +adherents +hezbollah +vitro +ferns +yanking +269 +php +registering +uppsala +cheerleading +confines +mischievous +tully +##ross +49th +docked +roam +stipulated +pumpkin +##bry +prompt +##ezer +blindly +shuddering +craftsmen +frail +scented +katharine +scramble +shaggy +sponge +helix +zaragoza +279 +##52 +43rd +backlash +fontaine +seizures +posse +cowan +nonfiction +telenovela +wwii +hammered +undone +##gpur +encircled +irs +##ivation +artefacts +oneself +searing +smallpox +##belle +##osaurus +shandong +breached +upland +blushing +rankin +infinitely +psyche +tolerated +docking +evicted +##col +unmarked +##lving +gnome +lettering +litres +musique +##oint +benevolent +##jal +blackened +##anna +mccall +racers +tingle +##ocene +##orestation +introductions +radically +292 +##hiff +##باد +1610 +1739 +munchen +plead +##nka +condo +scissors +##sight +##tens +apprehension +##cey +##yin +hallmark +watering +formulas +sequels +##llas +aggravated +bae +commencing +##building +enfield +prohibits +marne +vedic +civilized +euclidean +jagger +beforehand +blasts +dumont +##arney +##nem +740 +conversions +hierarchical +rios +simulator +##dya +##lellan +hedges +oleg +thrusts +shadowed +darby +maximize +1744 +gregorian +##nded +##routed +sham +unspecified +##hog +emory +factual +##smo +##tp +fooled +##rger +ortega +wellness +marlon +##oton +##urance +casket +keating +ley +enclave +##ayan +char +influencing +jia +##chenko +412 +ammonia +erebidae +incompatible +violins +cornered +##arat +grooves +astronauts +columbian +rampant +fabrication +kyushu +mahmud +vanish +##dern +mesopotamia +##lete +ict +##rgen +caspian +kenji +pitted +##vered +999 +grimace +roanoke +tchaikovsky +twinned +##analysis +##awan +xinjiang +arias +clemson +kazakh +sizable +1662 +##khand +##vard +plunge +tatum +vittorio +##nden +cholera +##dana +##oper +bracing +indifference +projectile +superliga +##chee +realises +upgrading +299 +porte +retribution +##vies +nk +stil +##resses +ama +bureaucracy +blackberry +bosch +testosterone +collapses +greer +##pathic +ioc +fifties +malls +##erved +bao +baskets +adolescents +siegfried +##osity +##tosis +mantra +detecting +existent +fledgling +##cchi +dissatisfied +gan +telecommunication +mingled +sobbed +6000 +controversies +outdated +taxis +##raus +fright +slams +##lham +##fect +##tten +detectors +fetal +tanned +##uw +fray +goth +olympian +skipping +mandates +scratches +sheng +unspoken +hyundai +tracey +hotspur +restrictive +##buch +americana +mundo +##bari +burroughs +diva +vulcan +##6th +distinctions +thumping +##ngen +mikey +sheds +fide +rescues +springsteen +vested +valuation +##ece +##ely +pinnacle +rake +sylvie +##edo +almond +quivering +##irus +alteration +faltered +##wad +51st +hydra +ticked +##kato +recommends +##dicated +antigua +arjun +stagecoach +wilfred +trickle +pronouns +##pon +aryan +nighttime +##anian +gall +pea +stitch +##hei +leung +milos +##dini +eritrea +nexus +starved +snowfall +kant +parasitic +cot +discus +hana +strikers +appleton +kitchens +##erina +##partisan +##itha +##vius +disclose +metis +##channel +1701 +tesla +##vera +fitch +1735 +blooded +##tila +decimal +##tang +##bai +cyclones +eun +bottled +peas +pensacola +basha +bolivian +crabs +boil +lanterns +partridge +roofed +1645 +necks +##phila +opined +patting +##kla +##lland +chuckles +volta +whereupon +##nche +devout +euroleague +suicidal +##dee +inherently +involuntary +knitting +nasser +##hide +puppets +colourful +courageous +southend +stills +miraculous +hodgson +richer +rochdale +ethernet +greta +uniting +prism +umm +##haya +##itical +##utation +deterioration +pointe +prowess +##ropriation +lids +scranton +billings +subcontinent +##koff +##scope +brute +kellogg +psalms +degraded +##vez +stanisław +##ructured +ferreira +pun +astonishing +gunnar +##yat +arya +prc +gottfried +##tight +excursion +##ographer +dina +##quil +##nare +huffington +illustrious +wilbur +gundam +verandah +##zard +naacp +##odle +constructive +fjord +kade +##naud +generosity +thrilling +baseline +cayman +frankish +plastics +accommodations +zoological +##fting +cedric +qb +motorized +##dome +##otted +squealed +tackled +canucks +budgets +situ +asthma +dail +gabled +grasslands +whimpered +writhing +judgments +##65 +minnie +pv +##carbon +bananas +grille +domes +monique +odin +maguire +markham +tierney +##estra +##chua +libel +poke +speedy +atrium +laval +notwithstanding +##edly +fai +kala +##sur +robb +##sma +listings +luz +supplementary +tianjin +##acing +enzo +jd +ric +scanner +croats +transcribed +##49 +arden +cv +##hair +##raphy +##lver +##uy +357 +seventies +staggering +alam +horticultural +hs +regression +timbers +blasting +##ounded +montagu +manipulating +##cit +catalytic +1550 +troopers +##meo +condemnation +fitzpatrick +##oire +##roved +inexperienced +1670 +castes +##lative +outing +314 +dubois +flicking +quarrel +ste +learners +1625 +iq +whistled +##class +282 +classify +tariffs +temperament +355 +folly +liszt +##yles +immersed +jordanian +ceasefire +apparel +extras +maru +fished +##bio +harta +stockport +assortment +craftsman +paralysis +transmitters +##cola +blindness +##wk +fatally +proficiency +solemnly +##orno +repairing +amore +groceries +ultraviolet +##chase +schoolhouse +##tua +resurgence +nailed +##otype +##× +ruse +saliva +diagrams +##tructing +albans +rann +thirties +1b +antennas +hilarious +cougars +paddington +stats +##eger +breakaway +ipod +reza +authorship +prohibiting +scoffed +##etz +##ttle +conscription +defected +trondheim +##fires +ivanov +keenan +##adan +##ciful +##fb +##slow +locating +##ials +##tford +cadiz +basalt +blankly +interned +rags +rattling +##tick +carpathian +reassured +sync +bum +guildford +iss +staunch +##onga +astronomers +sera +sofie +emergencies +susquehanna +##heard +duc +mastery +vh1 +williamsburg +bayer +buckled +craving +##khan +##rdes +bloomington +##write +alton +barbecue +##bians +justine +##hri +##ndt +delightful +smartphone +newtown +photon +retrieval +peugeot +hissing +##monium +##orough +flavors +lighted +relaunched +tainted +##games +##lysis +anarchy +microscopic +hopping +adept +evade +evie +##beau +inhibit +sinn +adjustable +hurst +intuition +wilton +cisco +44th +lawful +lowlands +stockings +thierry +##dalen +##hila +##nai +fates +prank +tb +maison +lobbied +provocative +1724 +4a +utopia +##qual +carbonate +gujarati +purcell +##rford +curtiss +##mei +overgrown +arenas +mediation +swallows +##rnik +respectful +turnbull +##hedron +##hope +alyssa +ozone +##ʻi +ami +gestapo +johansson +snooker +canteen +cuff +declines +empathy +stigma +##ags +##iner +##raine +taxpayers +gui +volga +##wright +##copic +lifespan +overcame +tattooed +enactment +giggles +##ador +##camp +barrington +bribe +obligatory +orbiting +peng +##enas +elusive +sucker +##vating +cong +hardship +empowered +anticipating +estrada +cryptic +greasy +detainees +planck +sudbury +plaid +dod +marriott +kayla +##ears +##vb +##zd +mortally +##hein +cognition +radha +319 +liechtenstein +meade +richly +argyle +harpsichord +liberalism +trumpets +lauded +tyrant +salsa +tiled +lear +promoters +reused +slicing +trident +##chuk +##gami +##lka +cantor +checkpoint +##points +gaul +leger +mammalian +##tov +##aar +##schaft +doha +frenchman +nirvana +##vino +delgado +headlining +##eron +##iography +jug +tko +1649 +naga +intersections +##jia +benfica +nawab +##suka +ashford +gulp +##deck +##vill +##rug +brentford +frazier +pleasures +dunne +potsdam +shenzhen +dentistry +##tec +flanagan +##dorff +##hear +chorale +dinah +prem +quezon +##rogated +relinquished +sutra +terri +##pani +flaps +##rissa +poly +##rnet +homme +aback +##eki +linger +womb +##kson +##lewood +doorstep +orthodoxy +threaded +westfield +##rval +dioceses +fridays +subsided +##gata +loyalists +##biotic +##ettes +letterman +lunatic +prelate +tenderly +invariably +souza +thug +winslow +##otide +furlongs +gogh +jeopardy +##runa +pegasus +##umble +humiliated +standalone +tagged +##roller +freshmen +klan +##bright +attaining +initiating +transatlantic +logged +viz +##uance +1723 +combatants +intervening +stephane +chieftain +despised +grazed +317 +cdc +galveston +godzilla +macro +simulate +##planes +parades +##esses +960 +##ductive +##unes +equator +overdose +##cans +##hosh +##lifting +joshi +epstein +sonora +treacherous +aquatics +manchu +responsive +##sation +supervisory +##christ +##llins +##ibar +##balance +##uso +kimball +karlsruhe +mab +##emy +ignores +phonetic +reuters +spaghetti +820 +almighty +danzig +rumbling +tombstone +designations +lured +outset +##felt +supermarkets +##wt +grupo +kei +kraft +susanna +##blood +comprehension +genealogy +##aghan +##verted +redding +##ythe +1722 +bowing +##pore +##roi +lest +sharpened +fulbright +valkyrie +sikhs +##unds +swans +bouquet +merritt +##tage +##venting +commuted +redhead +clerks +leasing +cesare +dea +hazy +##vances +fledged +greenfield +servicemen +##gical +armando +blackout +dt +sagged +downloadable +intra +potion +pods +##4th +##mism +xp +attendants +gambia +stale +##ntine +plump +asteroids +rediscovered +buds +flea +hive +##neas +1737 +classifications +debuts +##eles +olympus +scala +##eurs +##gno +##mute +hummed +sigismund +visuals +wiggled +await +pilasters +clench +sulfate +##ances +bellevue +enigma +trainee +snort +##sw +clouded +denim +##rank +##rder +churning +hartman +lodges +riches +sima +##missible +accountable +socrates +regulates +mueller +##cr +1702 +avoids +solids +himalayas +nutrient +pup +##jevic +squat +fades +nec +##lates +##pina +##rona +##ου +privateer +tequila +##gative +##mpton +apt +hornet +immortals +##dou +asturias +cleansing +dario +##rries +##anta +etymology +servicing +zhejiang +##venor +##nx +horned +erasmus +rayon +relocating +£10 +##bags +escalated +promenade +stubble +2010s +artisans +axial +liquids +mora +sho +yoo +##tsky +bundles +oldies +##nally +notification +bastion +##ths +sparkle +##lved +1728 +leash +pathogen +highs +##hmi +immature +880 +gonzaga +ignatius +mansions +monterrey +sweets +bryson +##loe +polled +regatta +brightest +pei +rosy +squid +hatfield +payroll +addict +meath +cornerback +heaviest +lodging +##mage +capcom +rippled +##sily +barnet +mayhem +ymca +snuggled +rousseau +##cute +blanchard +284 +fragmented +leighton +chromosomes +risking +##md +##strel +##utter +corinne +coyotes +cynical +hiroshi +yeomanry +##ractive +ebook +grading +mandela +plume +agustin +magdalene +##rkin +bea +femme +trafford +##coll +##lun +##tance +52nd +fourier +upton +##mental +camilla +gust +iihf +islamabad +longevity +##kala +feldman +netting +##rization +endeavour +foraging +mfa +orr +##open +greyish +contradiction +graz +##ruff +handicapped +marlene +tweed +oaxaca +spp +campos +miocene +pri +configured +cooks +pluto +cozy +pornographic +##entes +70th +fairness +glided +jonny +lynne +rounding +sired +##emon +##nist +remade +uncover +##mack +complied +lei +newsweek +##jured +##parts +##enting +##pg +293 +finer +guerrillas +athenian +deng +disused +stepmother +accuse +gingerly +seduction +521 +confronting +##walker +##going +gora +nostalgia +sabres +virginity +wrenched +##minated +syndication +wielding +eyre +##56 +##gnon +##igny +behaved +taxpayer +sweeps +##growth +childless +gallant +##ywood +amplified +geraldine +scrape +##ffi +babylonian +fresco +##rdan +##kney +##position +1718 +restricting +tack +fukuoka +osborn +selector +partnering +##dlow +318 +gnu +kia +tak +whitley +gables +##54 +##mania +mri +softness +immersion +##bots +##evsky +1713 +chilling +insignificant +pcs +##uis +elites +lina +purported +supplemental +teaming +##americana +##dding +##inton +proficient +rouen +##nage +##rret +niccolo +selects +##bread +fluffy +1621 +gruff +knotted +mukherjee +polgara +thrash +nicholls +secluded +smoothing +thru +corsica +loaf +whitaker +inquiries +##rrier +##kam +indochina +289 +marlins +myles +peking +##tea +extracts +pastry +superhuman +connacht +vogel +##ditional +##het +##udged +##lash +gloss +quarries +refit +teaser +##alic +##gaon +20s +materialized +sling +camped +pickering +tung +tracker +pursuant +##cide +cranes +soc +##cini +##typical +##viere +anhalt +overboard +workout +chores +fares +orphaned +stains +##logie +fenton +surpassing +joyah +triggers +##itte +grandmaster +##lass +##lists +clapping +fraudulent +ledger +nagasaki +##cor +##nosis +##tsa +eucalyptus +tun +##icio +##rney +##tara +dax +heroism +ina +wrexham +onboard +unsigned +##dates +moshe +galley +winnie +droplets +exiles +praises +watered +noodles +##aia +fein +adi +leland +multicultural +stink +bingo +comets +erskine +modernized +canned +constraint +domestically +chemotherapy +featherweight +stifled +##mum +darkly +irresistible +refreshing +hasty +isolate +##oys +kitchener +planners +##wehr +cages +yarn +implant +toulon +elects +childbirth +yue +##lind +##lone +cn +rightful +sportsman +junctions +remodeled +specifies +##rgh +291 +##oons +complimented +##urgent +lister +ot +##logic +bequeathed +cheekbones +fontana +gabby +##dial +amadeus +corrugated +maverick +resented +triangles +##hered +##usly +nazareth +tyrol +1675 +assent +poorer +sectional +aegean +##cous +296 +nylon +ghanaian +##egorical +##weig +cushions +forbid +fusiliers +obstruction +somerville +##scia +dime +earrings +elliptical +leyte +oder +polymers +timmy +atm +midtown +piloted +settles +continual +externally +mayfield +##uh +enrichment +henson +keane +persians +1733 +benji +braden +pep +324 +##efe +contenders +pepsi +valet +##isches +298 +##asse +##earing +goofy +stroll +##amen +authoritarian +occurrences +adversary +ahmedabad +tangent +toppled +dorchester +1672 +modernism +marxism +islamist +charlemagne +exponential +racks +unicode +brunette +mbc +pic +skirmish +##bund +##lad +##powered +##yst +hoisted +messina +shatter +##ctum +jedi +vantage +##music +##neil +clemens +mahmoud +corrupted +authentication +lowry +nils +##washed +omnibus +wounding +jillian +##itors +##opped +serialized +narcotics +handheld +##arm +##plicity +intersecting +stimulating +##onis +crate +fellowships +hemingway +casinos +climatic +fordham +copeland +drip +beatty +leaflets +robber +brothel +madeira +##hedral +sphinx +ultrasound +##vana +valor +forbade +leonid +villas +##aldo +duane +marquez +##cytes +disadvantaged +forearms +kawasaki +reacts +consular +lax +uncles +uphold +##hopper +concepcion +dorsey +lass +##izan +arching +passageway +1708 +researches +tia +internationals +##graphs +##opers +distinguishes +javanese +divert +##uven +plotted +##listic +##rwin +##erik +##tify +affirmative +signifies +validation +##bson +kari +felicity +georgina +zulu +##eros +##rained +##rath +overcoming +##dot +argyll +##rbin +1734 +chiba +ratification +windy +earls +parapet +##marks +hunan +pristine +astrid +punta +##gart +brodie +##kota +##oder +malaga +minerva +rouse +##phonic +bellowed +pagoda +portals +reclamation +##gur +##odies +##⁄₄ +parentheses +quoting +allergic +palette +showcases +benefactor +heartland +nonlinear +##tness +bladed +cheerfully +scans +##ety +##hone +1666 +girlfriends +pedersen +hiram +sous +##liche +##nator +1683 +##nery +##orio +##umen +bobo +primaries +smiley +##cb +unearthed +uniformly +fis +metadata +1635 +ind +##oted +recoil +##titles +##tura +##ια +406 +hilbert +jamestown +mcmillan +tulane +seychelles +##frid +antics +coli +fated +stucco +##grants +1654 +bulky +accolades +arrays +caledonian +carnage +optimism +puebla +##tative +##cave +enforcing +rotherham +seo +dunlop +aeronautics +chimed +incline +zoning +archduke +hellenistic +##oses +##sions +candi +thong +##ople +magnate +rustic +##rsk +projective +slant +##offs +danes +hollis +vocalists +##ammed +congenital +contend +gesellschaft +##ocating +##pressive +douglass +quieter +##cm +##kshi +howled +salim +spontaneously +townsville +buena +southport +##bold +kato +1638 +faerie +stiffly +##vus +##rled +297 +flawless +realising +taboo +##7th +bytes +straightening +356 +jena +##hid +##rmin +cartwright +berber +bertram +soloists +411 +noses +417 +coping +fission +hardin +inca +##cen +1717 +mobilized +vhf +##raf +biscuits +curate +##85 +##anial +331 +gaunt +neighbourhoods +1540 +##abas +blanca +bypassed +sockets +behold +coincidentally +##bane +nara +shave +splinter +terrific +##arion +##erian +commonplace +juris +redwood +waistband +boxed +caitlin +fingerprints +jennie +naturalized +##ired +balfour +craters +jody +bungalow +hugely +quilt +glitter +pigeons +undertaker +bulging +constrained +goo +##sil +##akh +assimilation +reworked +##person +persuasion +##pants +felicia +##cliff +##ulent +1732 +explodes +##dun +##inium +##zic +lyman +vulture +hog +overlook +begs +northwards +ow +spoil +##urer +fatima +favorably +accumulate +sargent +sorority +corresponded +dispersal +kochi +toned +##imi +##lita +internacional +newfound +##agger +##lynn +##rigue +booths +peanuts +##eborg +medicare +muriel +nur +##uram +crates +millennia +pajamas +worsened +##breakers +jimi +vanuatu +yawned +##udeau +carousel +##hony +hurdle +##ccus +##mounted +##pod +rv +##eche +airship +ambiguity +compulsion +recapture +##claiming +arthritis +##osomal +1667 +asserting +ngc +sniffing +dade +discontent +glendale +ported +##amina +defamation +rammed +##scent +fling +livingstone +##fleet +875 +##ppy +apocalyptic +comrade +lcd +##lowe +cessna +eine +persecuted +subsistence +demi +hoop +reliefs +710 +coptic +progressing +stemmed +perpetrators +1665 +priestess +##nio +dobson +ebony +rooster +itf +tortricidae +##bbon +##jian +cleanup +##jean +##øy +1721 +eighties +taxonomic +holiness +##hearted +##spar +antilles +showcasing +stabilized +##nb +gia +mascara +michelangelo +dawned +##uria +##vinsky +extinguished +fitz +grotesque +£100 +##fera +##loid +##mous +barges +neue +throbbed +cipher +johnnie +##a1 +##mpt +outburst +##swick +spearheaded +administrations +c1 +heartbreak +pixels +pleasantly +##enay +lombardy +plush +##nsed +bobbie +##hly +reapers +tremor +xiang +minogue +substantive +hitch +barak +##wyl +kwan +##encia +910 +obscene +elegance +indus +surfer +bribery +conserve +##hyllum +##masters +horatio +##fat +apes +rebound +psychotic +##pour +iteration +##mium +##vani +botanic +horribly +antiques +dispose +paxton +##hli +##wg +timeless +1704 +disregard +engraver +hounds +##bau +##version +looted +uno +facilitates +groans +masjid +rutland +antibody +disqualification +decatur +footballers +quake +slacks +48th +rein +scribe +stabilize +commits +exemplary +tho +##hort +##chison +pantry +traversed +##hiti +disrepair +identifiable +vibrated +baccalaureate +##nnis +csa +interviewing +##iensis +##raße +greaves +wealthiest +343 +classed +jogged +£5 +##58 +##atal +illuminating +knicks +respecting +##uno +scrubbed +##iji +##dles +kruger +moods +growls +raider +silvia +chefs +kam +vr +cree +percival +##terol +gunter +counterattack +defiant +henan +ze +##rasia +##riety +equivalence +submissions +##fra +##thor +bautista +mechanically +##heater +cornice +herbal +templar +##mering +outputs +ruining +ligand +renumbered +extravagant +mika +blockbuster +eta +insurrection +##ilia +darkening +ferocious +pianos +strife +kinship +##aer +melee +##anor +##iste +##may +##oue +decidedly +weep +##jad +##missive +##ppel +354 +puget +unease +##gnant +1629 +hammering +kassel +ob +wessex +##lga +bromwich +egan +paranoia +utilization +##atable +##idad +contradictory +provoke +##ols +##ouring +##tangled +knesset +##very +##lette +plumbing +##sden +##¹ +greensboro +occult +sniff +338 +zev +beaming +gamer +haggard +mahal +##olt +##pins +mendes +utmost +briefing +gunnery +##gut +##pher +##zh +##rok +1679 +khalifa +sonya +##boot +principals +urbana +wiring +##liffe +##minating +##rrado +dahl +nyu +skepticism +np +townspeople +ithaca +lobster +somethin +##fur +##arina +##−1 +freighter +zimmerman +biceps +contractual +##herton +amend +hurrying +subconscious +##anal +336 +meng +clermont +spawning +##eia +##lub +dignitaries +impetus +snacks +spotting +twigs +##bilis +##cz +##ouk +libertadores +nic +skylar +##aina +##firm +gustave +asean +##anum +dieter +legislatures +flirt +bromley +trolls +umar +##bbies +##tyle +blah +parc +bridgeport +crank +negligence +##nction +46th +constantin +molded +bandages +seriousness +00pm +siegel +carpets +compartments +upbeat +statehood +##dner +##edging +marko +730 +platt +##hane +paving +##iy +1738 +abbess +impatience +limousine +nbl +##talk +441 +lucille +mojo +nightfall +robbers +##nais +karel +brisk +calves +replicate +ascribed +telescopes +##olf +intimidated +##reen +ballast +specialization +##sit +aerodynamic +caliphate +rainer +visionary +##arded +epsilon +##aday +##onte +aggregation +auditory +boosted +reunification +kathmandu +loco +robyn +402 +acknowledges +appointing +humanoid +newell +redeveloped +restraints +##tained +barbarians +chopper +1609 +italiana +##lez +##lho +investigates +wrestlemania +##anies +##bib +690 +##falls +creaked +dragoons +gravely +minions +stupidity +volley +##harat +##week +musik +##eries +##uously +fungal +massimo +semantics +malvern +##ahl +##pee +discourage +embryo +imperialism +1910s +profoundly +##ddled +jiangsu +sparkled +stat +##holz +sweatshirt +tobin +##iction +sneered +##cheon +##oit +brit +causal +smyth +##neuve +diffuse +perrin +silvio +##ipes +##recht +detonated +iqbal +selma +##nism +##zumi +roasted +##riders +tay +##ados +##mament +##mut +##rud +840 +completes +nipples +cfa +flavour +hirsch +##laus +calderon +sneakers +moravian +##ksha +1622 +rq +294 +##imeters +bodo +##isance +##pre +##ronia +anatomical +excerpt +##lke +dh +kunst +##tablished +##scoe +biomass +panted +unharmed +gael +housemates +montpellier +##59 +coa +rodents +tonic +hickory +singleton +##taro +451 +1719 +aldo +breaststroke +dempsey +och +rocco +##cuit +merton +dissemination +midsummer +serials +##idi +haji +polynomials +##rdon +gs +enoch +prematurely +shutter +taunton +£3 +##grating +##inates +archangel +harassed +##asco +326 +archway +dazzling +##ecin +1736 +sumo +wat +##kovich +1086 +honneur +##ently +##nostic +##ttal +##idon +1605 +403 +1716 +blogger +rents +##gnan +hires +##ikh +##dant +howie +##rons +handler +retracted +shocks +1632 +arun +duluth +kepler +trumpeter +##lary +peeking +seasoned +trooper +##mara +laszlo +##iciencies +##rti +heterosexual +##inatory +##ssion +indira +jogging +##inga +##lism +beit +dissatisfaction +malice +##ately +nedra +peeling +##rgeon +47th +stadiums +475 +vertigo +##ains +iced +restroom +##plify +##tub +illustrating +pear +##chner +##sibility +inorganic +rappers +receipts +watery +##kura +lucinda +##oulos +reintroduced +##8th +##tched +gracefully +saxons +nutritional +wastewater +rained +favourites +bedrock +fisted +hallways +likeness +upscale +##lateral +1580 +blinds +prequel +##pps +##tama +deter +humiliating +restraining +tn +vents +1659 +laundering +recess +rosary +tractors +coulter +federer +##ifiers +##plin +persistence +##quitable +geschichte +pendulum +quakers +##beam +bassett +pictorial +buffet +koln +##sitor +drills +reciprocal +shooters +##57 +##cton +##tees +converge +pip +dmitri +donnelly +yamamoto +aqua +azores +demographics +hypnotic +spitfire +suspend +wryly +roderick +##rran +sebastien +##asurable +mavericks +##fles +##200 +himalayan +prodigy +##iance +transvaal +demonstrators +handcuffs +dodged +mcnamara +sublime +1726 +crazed +##efined +##till +ivo +pondered +reconciled +shrill +sava +##duk +bal +cad +heresy +jaipur +goran +##nished +341 +lux +shelly +whitehall +##hre +israelis +peacekeeping +##wled +1703 +demetrius +ousted +##arians +##zos +beale +anwar +backstroke +raged +shrinking +cremated +##yck +benign +towing +wadi +darmstadt +landfill +parana +soothe +colleen +sidewalks +mayfair +tumble +hepatitis +ferrer +superstructure +##gingly +##urse +##wee +anthropological +translators +##mies +closeness +hooves +##pw +mondays +##roll +##vita +landscaping +##urized +purification +sock +thorns +thwarted +jalan +tiberius +##taka +saline +##rito +confidently +khyber +sculptors +##ij +brahms +hammersmith +inspectors +battista +fivb +fragmentation +hackney +##uls +arresting +exercising +antoinette +bedfordshire +##zily +dyed +##hema +1656 +racetrack +variability +##tique +1655 +austrians +deteriorating +madman +theorists +aix +lehman +weathered +1731 +decreed +eruptions +1729 +flaw +quinlan +sorbonne +flutes +nunez +1711 +adored +downwards +fable +rasped +1712 +moritz +mouthful +renegade +shivers +stunts +dysfunction +restrain +translit +327 +pancakes +##avio +##cision +##tray +351 +vial +##lden +bain +##maid +##oxide +chihuahua +malacca +vimes +##rba +##rnier +1664 +donnie +plaques +##ually +337 +bangs +floppy +huntsville +loretta +nikolay +##otte +eater +handgun +ubiquitous +##hett +eras +zodiac +1634 +##omorphic +1820s +##zog +cochran +##bula +##lithic +warring +##rada +dalai +excused +blazers +mcconnell +reeling +bot +este +##abi +geese +hoax +taxon +##bla +guitarists +##icon +condemning +hunts +inversion +moffat +taekwondo +##lvis +1624 +stammered +##rest +##rzy +sousa +fundraiser +marylebone +navigable +uptown +cabbage +daniela +salman +shitty +whimper +##kian +##utive +programmers +protections +rm +##rmi +##rued +forceful +##enes +fuss +##tao +##wash +brat +oppressive +reykjavik +spartak +ticking +##inkles +##kiewicz +adolph +horst +maui +protege +straighten +cpc +landau +concourse +clements +resultant +##ando +imaginative +joo +reactivated +##rem +##ffled +##uising +consultative +##guide +flop +kaitlyn +mergers +parenting +somber +##vron +supervise +vidhan +##imum +courtship +exemplified +harmonies +medallist +refining +##rrow +##ка +amara +##hum +780 +goalscorer +sited +overshadowed +rohan +displeasure +secretive +multiplied +osman +##orth +engravings +padre +##kali +##veda +miniatures +mis +##yala +clap +pali +rook +##cana +1692 +57th +antennae +astro +oskar +1628 +bulldog +crotch +hackett +yucatan +##sure +amplifiers +brno +ferrara +migrating +##gree +thanking +turing +##eza +mccann +ting +andersson +onslaught +gaines +ganga +incense +standardization +##mation +sentai +scuba +stuffing +turquoise +waivers +alloys +##vitt +regaining +vaults +##clops +##gizing +digger +furry +memorabilia +probing +##iad +payton +rec +deutschland +filippo +opaque +seamen +zenith +afrikaans +##filtration +disciplined +inspirational +##merie +banco +confuse +grafton +tod +##dgets +championed +simi +anomaly +biplane +##ceptive +electrode +##para +1697 +cleavage +crossbow +swirl +informant +##lars +##osta +afi +bonfire +spec +##oux +lakeside +slump +##culus +##lais +##qvist +##rrigan +1016 +facades +borg +inwardly +cervical +xl +pointedly +050 +stabilization +##odon +chests +1699 +hacked +ctv +orthogonal +suzy +##lastic +gaulle +jacobite +rearview +##cam +##erted +ashby +##drik +##igate +##mise +##zbek +affectionately +canine +disperse +latham +##istles +##ivar +spielberg +##orin +##idium +ezekiel +cid +##sg +durga +middletown +##cina +customized +frontiers +harden +##etano +##zzy +1604 +bolsheviks +##66 +coloration +yoko +##bedo +briefs +slabs +debra +liquidation +plumage +##oin +blossoms +dementia +subsidy +1611 +proctor +relational +jerseys +parochial +ter +##ici +esa +peshawar +cavalier +loren +cpi +idiots +shamrock +1646 +dutton +malabar +mustache +##endez +##ocytes +referencing +terminates +marche +yarmouth +##sop +acton +mated +seton +subtly +baptised +beige +extremes +jolted +kristina +telecast +##actic +safeguard +waldo +##baldi +##bular +endeavors +sloppy +subterranean +##ensburg +##itung +delicately +pigment +tq +##scu +1626 +##ound +collisions +coveted +herds +##personal +##meister +##nberger +chopra +##ricting +abnormalities +defective +galician +lucie +##dilly +alligator +likened +##genase +burundi +clears +complexion +derelict +deafening +diablo +fingered +champaign +dogg +enlist +isotope +labeling +mrna +##erre +brilliance +marvelous +##ayo +1652 +crawley +ether +footed +dwellers +deserts +hamish +rubs +warlock +skimmed +##lizer +870 +buick +embark +heraldic +irregularities +##ajan +kiara +##kulam +##ieg +antigen +kowalski +##lge +oakley +visitation +##mbit +vt +##suit +1570 +murderers +##miento +##rites +chimneys +##sling +condemn +custer +exchequer +havre +##ghi +fluctuations +##rations +dfb +hendricks +vaccines +##tarian +nietzsche +biking +juicy +##duced +brooding +scrolling +selangor +##ragan +352 +annum +boomed +seminole +sugarcane +##dna +departmental +dismissing +innsbruck +arteries +ashok +batavia +daze +kun +overtook +##rga +##tlan +beheaded +gaddafi +holm +electronically +faulty +galilee +fractures +kobayashi +##lized +gunmen +magma +aramaic +mala +eastenders +inference +messengers +bf +##qu +407 +bathrooms +##vere +1658 +flashbacks +ideally +misunderstood +##jali +##weather +mendez +##grounds +505 +uncanny +##iii +1709 +friendships +##nbc +sacrament +accommodated +reiterated +logistical +pebbles +thumped +##escence +administering +decrees +drafts +##flight +##cased +##tula +futuristic +picket +intimidation +winthrop +##fahan +interfered +339 +afar +francoise +morally +uta +cochin +croft +dwarfs +##bruck +##dents +##nami +biker +##hner +##meral +nano +##isen +##ometric +##pres +##ан +brightened +meek +parcels +securely +gunners +##jhl +##zko +agile +hysteria +##lten +##rcus +bukit +champs +chevy +cuckoo +leith +sadler +theologians +welded +##section +1663 +jj +plurality +xander +##rooms +##formed +shredded +temps +intimately +pau +tormented +##lok +##stellar +1618 +charred +ems +essen +##mmel +alarms +spraying +ascot +blooms +twinkle +##abia +##apes +internment +obsidian +##chaft +snoop +##dav +##ooping +malibu +##tension +quiver +##itia +hays +mcintosh +travers +walsall +##ffie +1623 +beverley +schwarz +plunging +structurally +m3 +rosenthal +vikram +##tsk +770 +ghz +##onda +##tiv +chalmers +groningen +pew +reckon +unicef +##rvis +55th +##gni +1651 +sulawesi +avila +cai +metaphysical +screwing +turbulence +##mberg +augusto +samba +56th +baffled +momentary +toxin +##urian +##wani +aachen +condoms +dali +steppe +##3d +##app +##oed +##year +adolescence +dauphin +electrically +inaccessible +microscopy +nikita +##ega +atv +##cel +##enter +##oles +##oteric +##ы +accountants +punishments +wrongly +bribes +adventurous +clinch +flinders +southland +##hem +##kata +gough +##ciency +lads +soared +##ה +undergoes +deformation +outlawed +rubbish +##arus +##mussen +##nidae +##rzburg +arcs +##ingdon +##tituted +1695 +wheelbase +wheeling +bombardier +campground +zebra +##lices +##oj +##bain +lullaby +##ecure +donetsk +wylie +grenada +##arding +##ης +squinting +eireann +opposes +##andra +maximal +runes +##broken +##cuting +##iface +##ror +##rosis +additive +britney +adultery +triggering +##drome +detrimental +aarhus +containment +jc +swapped +vichy +##ioms +madly +##oric +##rag +brant +##ckey +##trix +1560 +1612 +broughton +rustling +##stems +##uder +asbestos +mentoring +##nivorous +finley +leaps +##isan +apical +pry +slits +substitutes +##dict +intuitive +fantasia +insistent +unreasonable +##igen +##vna +domed +hannover +margot +ponder +##zziness +impromptu +jian +lc +rampage +stemming +##eft +andrey +gerais +whichever +amnesia +appropriated +anzac +clicks +modifying +ultimatum +cambrian +maids +verve +yellowstone +##mbs +conservatoire +##scribe +adherence +dinners +spectra +imperfect +mysteriously +sidekick +tatar +tuba +##aks +##ifolia +distrust +##athan +##zle +c2 +ronin +zac +##pse +celaena +instrumentalist +scents +skopje +##mbling +comical +compensated +vidal +condor +intersect +jingle +wavelengths +##urrent +mcqueen +##izzly +carp +weasel +422 +kanye +militias +postdoctoral +eugen +gunslinger +##ɛ +faux +hospice +##for +appalled +derivation +dwarves +##elis +dilapidated +##folk +astoria +philology +##lwyn +##otho +##saka +inducing +philanthropy +##bf +##itative +geek +markedly +sql +##yce +bessie +indices +rn +##flict +495 +frowns +resolving +weightlifting +tugs +cleric +contentious +1653 +mania +rms +##miya +##reate +##ruck +##tucket +bien +eels +marek +##ayton +##cence +discreet +unofficially +##ife +leaks +##bber +1705 +332 +dung +compressor +hillsborough +pandit +shillings +distal +##skin +381 +##tat +##you +nosed +##nir +mangrove +undeveloped +##idia +textures +##inho +##500 +##rise +ae +irritating +nay +amazingly +bancroft +apologetic +compassionate +kata +symphonies +##lovic +airspace +##lch +930 +gifford +precautions +fulfillment +sevilla +vulgar +martinique +##urities +looting +piccolo +tidy +##dermott +quadrant +armchair +incomes +mathematicians +stampede +nilsson +##inking +##scan +foo +quarterfinal +##ostal +shang +shouldered +squirrels +##owe +344 +vinegar +##bner +##rchy +##systems +delaying +##trics +ars +dwyer +rhapsody +sponsoring +##gration +bipolar +cinder +starters +##olio +##urst +421 +signage +##nty +aground +figurative +mons +acquaintances +duets +erroneously +soyuz +elliptic +recreated +##cultural +##quette +##ssed +##tma +##zcz +moderator +scares +##itaire +##stones +##udence +juniper +sighting +##just +##nsen +britten +calabria +ry +bop +cramer +forsyth +stillness +##л +airmen +gathers +unfit +##umber +##upt +taunting +##rip +seeker +streamlined +##bution +holster +schumann +tread +vox +##gano +##onzo +strive +dil +reforming +covent +newbury +predicting +##orro +decorate +tre +##puted +andover +ie +asahi +dept +dunkirk +gills +##tori +buren +huskies +##stis +##stov +abstracts +bets +loosen +##opa +1682 +yearning +##glio +##sir +berman +effortlessly +enamel +napoli +persist +##peration +##uez +attache +elisa +b1 +invitations +##kic +accelerating +reindeer +boardwalk +clutches +nelly +polka +starbucks +##kei +adamant +huey +lough +unbroken +adventurer +embroidery +inspecting +stanza +##ducted +naia +taluka +##pone +##roids +chases +deprivation +florian +##jing +##ppet +earthly +##lib +##ssee +colossal +foreigner +vet +freaks +patrice +rosewood +triassic +upstate +##pkins +dominates +ata +chants +ks +vo +##400 +##bley +##raya +##rmed +555 +agra +infiltrate +##ailing +##ilation +##tzer +##uppe +##werk +binoculars +enthusiast +fujian +squeak +##avs +abolitionist +almeida +boredom +hampstead +marsden +rations +##ands +inflated +334 +bonuses +rosalie +patna +##rco +329 +detachments +penitentiary +54th +flourishing +woolf +##dion +##etched +papyrus +##lster +##nsor +##toy +bobbed +dismounted +endelle +inhuman +motorola +tbs +wince +wreath +##ticus +hideout +inspections +sanjay +disgrace +infused +pudding +stalks +##urbed +arsenic +leases +##hyl +##rrard +collarbone +##waite +##wil +dowry +##bant +##edance +genealogical +nitrate +salamanca +scandals +thyroid +necessitated +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##{ +##| +##} +##~ +##¡ +##¢ +##£ +##¤ +##¥ +##¦ +##§ +##¨ +##© +##ª +##« +##¬ +##® +##± +##´ +##µ +##¶ +##· +##º +##» +##¼ +##¾ +##¿ +##æ +##ð +##÷ +##þ +##đ +##ħ +##ŋ +##œ +##ƒ +##ɐ +##ɑ +##ɒ +##ɔ +##ɕ +##ə +##ɡ +##ɣ +##ɨ +##ɪ +##ɫ +##ɬ +##ɯ +##ɲ +##ɴ +##ɹ +##ɾ +##ʀ +##ʁ +##ʂ +##ʃ +##ʉ +##ʊ +##ʋ +##ʌ +##ʎ +##ʐ +##ʑ +##ʒ +##ʔ +##ʰ +##ʲ +##ʳ +##ʷ +##ʸ +##ʻ +##ʼ +##ʾ +##ʿ +##ˈ +##ˡ +##ˢ +##ˣ +##ˤ +##β +##γ +##δ +##ε +##ζ +##θ +##κ +##λ +##μ +##ξ +##ο +##π +##ρ +##σ +##τ +##υ +##φ +##χ +##ψ +##ω +##б +##г +##д +##ж +##з +##м +##п +##с +##у +##ф +##х +##ц +##ч +##ш +##щ +##ъ +##э +##ю +##ђ +##є +##і +##ј +##љ +##њ +##ћ +##ӏ +##ա +##բ +##գ +##դ +##ե +##թ +##ի +##լ +##կ +##հ +##մ +##յ +##ն +##ո +##պ +##ս +##վ +##տ +##ր +##ւ +##ք +##־ +##א +##ב +##ג +##ד +##ו +##ז +##ח +##ט +##י +##ך +##כ +##ל +##ם +##מ +##ן +##נ +##ס +##ע +##ף +##פ +##ץ +##צ +##ק +##ר +##ש +##ת +##، +##ء +##ب +##ت +##ث +##ج +##ح +##خ +##ذ +##ز +##س +##ش +##ص +##ض +##ط +##ظ +##ع +##غ +##ـ +##ف +##ق +##ك +##و +##ى +##ٹ +##پ +##چ +##ک +##گ +##ں +##ھ +##ہ +##ے +##अ +##आ +##उ +##ए +##क +##ख +##ग +##च +##ज +##ट +##ड +##ण +##त +##थ +##द +##ध +##न +##प +##ब +##भ +##म +##य +##र +##ल +##व +##श +##ष +##स +##ह +##ा +##ि +##ी +##ो +##। +##॥ +##ং +##অ +##আ +##ই +##উ +##এ +##ও +##ক +##খ +##গ +##চ +##ছ +##জ +##ট +##ড +##ণ +##ত +##থ +##দ +##ধ +##ন +##প +##ব +##ভ +##ম +##য +##র +##ল +##শ +##ষ +##স +##হ +##া +##ি +##ী +##ে +##க +##ச +##ட +##த +##ந +##ன +##ப +##ம +##ய +##ர +##ல +##ள +##வ +##ா +##ி +##ு +##ே +##ை +##ನ +##ರ +##ಾ +##ක +##ය +##ර +##ල +##ව +##ා +##ก +##ง +##ต +##ท +##น +##พ +##ม +##ย +##ร +##ล +##ว +##ส +##อ +##า +##เ +##་ +##། +##ག +##ང +##ད +##ན +##པ +##བ +##མ +##འ +##ར +##ལ +##ས +##မ +##ა +##ბ +##გ +##დ +##ე +##ვ +##თ +##ი +##კ +##ლ +##მ +##ნ +##ო +##რ +##ს +##ტ +##უ +##ᄀ +##ᄂ +##ᄃ +##ᄅ +##ᄆ +##ᄇ +##ᄉ +##ᄊ +##ᄋ +##ᄌ +##ᄎ +##ᄏ +##ᄐ +##ᄑ +##ᄒ +##ᅡ +##ᅢ +##ᅥ +##ᅦ +##ᅧ +##ᅩ +##ᅪ +##ᅭ +##ᅮ +##ᅯ +##ᅲ +##ᅳ +##ᅴ +##ᅵ +##ᆨ +##ᆫ +##ᆯ +##ᆷ +##ᆸ +##ᆼ +##ᴬ +##ᴮ +##ᴰ +##ᴵ +##ᴺ +##ᵀ +##ᵃ +##ᵇ +##ᵈ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵖ +##ᵗ +##ᵘ +##ᵣ +##ᵤ +##ᵥ +##ᶜ +##ᶠ +##‐ +##‑ +##‒ +##– +##— +##― +##‖ +##‘ +##’ +##‚ +##“ +##” +##„ +##† +##‡ +##• +##… +##‰ +##′ +##″ +##› +##‿ +##⁄ +##⁰ +##ⁱ +##⁴ +##⁵ +##⁶ +##⁷ +##⁸ +##⁹ +##⁻ +##ⁿ +##₅ +##₆ +##₇ +##₈ +##₉ +##₊ +##₍ +##₎ +##ₐ +##ₑ +##ₒ +##ₓ +##ₕ +##ₖ +##ₗ +##ₘ +##ₚ +##ₛ +##ₜ +##₤ +##₩ +##€ +##₱ +##₹ +##ℓ +##№ +##ℝ +##™ +##⅓ +##⅔ +##← +##↑ +##→ +##↓ +##↔ +##↦ +##⇄ +##⇌ +##⇒ +##∂ +##∅ +##∆ +##∇ +##∈ +##∗ +##∘ +##√ +##∞ +##∧ +##∨ +##∩ +##∪ +##≈ +##≡ +##≤ +##≥ +##⊂ +##⊆ +##⊕ +##⊗ +##⋅ +##─ +##│ +##■ +##▪ +##● +##★ +##☆ +##☉ +##♠ +##♣ +##♥ +##♦ +##♯ +##⟨ +##⟩ +##ⱼ +##⺩ +##⺼ +##⽥ +##、 +##。 +##〈 +##〉 +##《 +##》 +##「 +##」 +##『 +##』 +##〜 +##あ +##い +##う +##え +##お +##か +##き +##く +##け +##こ +##さ +##し +##す +##せ +##そ +##た +##ち +##っ +##つ +##て +##と +##な +##に +##ぬ +##ね +##の +##は +##ひ +##ふ +##へ +##ほ +##ま +##み +##む +##め +##も +##や +##ゆ +##よ +##ら +##り +##る +##れ +##ろ +##を +##ん +##ァ +##ア +##ィ +##イ +##ウ +##ェ +##エ +##オ +##カ +##キ +##ク +##ケ +##コ +##サ +##シ +##ス +##セ +##タ +##チ +##ッ +##ツ +##テ +##ト +##ナ +##ニ +##ノ +##ハ +##ヒ +##フ +##ヘ +##ホ +##マ +##ミ +##ム +##メ +##モ +##ャ +##ュ +##ョ +##ラ +##リ +##ル +##レ +##ロ +##ワ +##ン +##・ +##ー +##一 +##三 +##上 +##下 +##不 +##世 +##中 +##主 +##久 +##之 +##也 +##事 +##二 +##五 +##井 +##京 +##人 +##亻 +##仁 +##介 +##代 +##仮 +##伊 +##会 +##佐 +##侍 +##保 +##信 +##健 +##元 +##光 +##八 +##公 +##内 +##出 +##分 +##前 +##劉 +##力 +##加 +##勝 +##北 +##区 +##十 +##千 +##南 +##博 +##原 +##口 +##古 +##史 +##司 +##合 +##吉 +##同 +##名 +##和 +##囗 +##四 +##国 +##國 +##土 +##地 +##坂 +##城 +##堂 +##場 +##士 +##夏 +##外 +##大 +##天 +##太 +##夫 +##奈 +##女 +##子 +##学 +##宀 +##宇 +##安 +##宗 +##定 +##宣 +##宮 +##家 +##宿 +##寺 +##將 +##小 +##尚 +##山 +##岡 +##島 +##崎 +##川 +##州 +##巿 +##帝 +##平 +##年 +##幸 +##广 +##弘 +##張 +##彳 +##後 +##御 +##德 +##心 +##忄 +##志 +##忠 +##愛 +##成 +##我 +##戦 +##戸 +##手 +##扌 +##政 +##文 +##新 +##方 +##日 +##明 +##星 +##春 +##昭 +##智 +##曲 +##書 +##月 +##有 +##朝 +##木 +##本 +##李 +##村 +##東 +##松 +##林 +##森 +##楊 +##樹 +##橋 +##歌 +##止 +##正 +##武 +##比 +##氏 +##民 +##水 +##氵 +##氷 +##永 +##江 +##沢 +##河 +##治 +##法 +##海 +##清 +##漢 +##瀬 +##火 +##版 +##犬 +##王 +##生 +##田 +##男 +##疒 +##発 +##白 +##的 +##皇 +##目 +##相 +##省 +##真 +##石 +##示 +##社 +##神 +##福 +##禾 +##秀 +##秋 +##空 +##立 +##章 +##竹 +##糹 +##美 +##義 +##耳 +##良 +##艹 +##花 +##英 +##華 +##葉 +##藤 +##行 +##街 +##西 +##見 +##訁 +##語 +##谷 +##貝 +##貴 +##車 +##軍 +##辶 +##道 +##郎 +##郡 +##部 +##都 +##里 +##野 +##金 +##鈴 +##镇 +##長 +##門 +##間 +##阝 +##阿 +##陳 +##陽 +##雄 +##青 +##面 +##風 +##食 +##香 +##馬 +##高 +##龍 +##龸 +##fi +##fl +##! +##( +##) +##, +##- +##. +##/ +##: +##? +##~ diff --git a/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json b/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a4807695d56a3aea97a55a9db97ba753e960748 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json @@ -0,0 +1,12 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 1024, + "num_attention_heads": 12, + "num_hidden_layers": 2, + "vocab_size": 768 +} \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json b/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91c46b63eba081afb28085a6d53f390ada5a5cfe --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json @@ -0,0 +1,14 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522, + "num_decoder_layers": 1, + "max_target_embeddings": 512 +} diff --git a/yc2_univl/backup/pdvc/modules/file_utils.py b/yc2_univl/backup/pdvc/modules/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..43fa8ca87e20ee5333dd84a09795a743bbf3f183 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/file_utils.py @@ -0,0 +1,239 @@ +""" +Utilities for working with the local dataset cache. +This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp +Copyright by the AllenNLP authors. +""" + +import os +import logging +import shutil +import tempfile +import json +from urllib.parse import urlparse +from pathlib import Path +from typing import Optional, Tuple, Union, IO, Callable, Set +from hashlib import sha256 +from functools import wraps + +from tqdm import tqdm + +import boto3 +from botocore.exceptions import ClientError +import requests + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + +PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) + + +def url_to_filename(url: str, etag: str = None) -> str: + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the url's, delimited + by a period. + """ + url_bytes = url.encode('utf-8') + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode('utf-8') + etag_hash = sha256(etag_bytes) + filename += '.' + etag_hash.hexdigest() + + return filename + + +def filename_to_url(filename: str, cache_dir: Union[str, Path] = None) -> Tuple[str, str]: + """ + Return the url and etag (which may be ``None``) stored for `filename`. + Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + cache_path = os.path.join(cache_dir, filename) + if not os.path.exists(cache_path): + raise FileNotFoundError("file {} not found".format(cache_path)) + + meta_path = cache_path + '.json' + if not os.path.exists(meta_path): + raise FileNotFoundError("file {} not found".format(meta_path)) + + with open(meta_path) as meta_file: + metadata = json.load(meta_file) + url = metadata['url'] + etag = metadata['etag'] + + return url, etag + + +def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str: + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ('http', 'https', 's3'): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == '': + # File, but it doesn't exist. + raise FileNotFoundError("file {} not found".format(url_or_filename)) + else: + # Something unknown + raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + + +def split_s3_path(url: str) -> Tuple[str, str]: + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + + +def s3_request(func: Callable): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url: str, *args, **kwargs): + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise FileNotFoundError("file {} not found".format(url)) + else: + raise + + return wrapper + + +@s3_request +def s3_etag(url: str) -> Optional[str]: + """Check ETag on S3 object.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + + +@s3_request +def s3_get(url: str, temp_file: IO) -> None: + """Pull a file directly from S3.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + + +def http_get(url: str, temp_file: IO) -> None: + req = requests.get(url, stream=True) + content_length = req.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + + +def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str: + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + os.makedirs(cache_dir, exist_ok=True) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + response = requests.head(url, allow_redirects=True) + if response.status_code != 200: + raise IOError("HEAD request failed for url {} with status code {}" + .format(url, response.status_code)) + etag = response.headers.get("ETag") + + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, 'wb') as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {'url': url, 'etag': etag} + meta_path = cache_path + '.json' + with open(meta_path, 'w') as meta_file: + json.dump(meta, meta_file) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + + +def read_set_from_file(filename: str) -> Set[str]: + ''' + Extract a de-duped collection (set) of text from a file. + Expected file format is one item per line. + ''' + collection = set() + with open(filename, 'r', encoding='utf-8') as file_: + for line in file_: + collection.add(line.rstrip()) + return collection + + +def get_file_extension(path: str, dot=True, lower: bool = True): + ext = os.path.splitext(path)[1] + ext = ext if dot else ext[1:] + return ext.lower() if lower else ext diff --git a/yc2_univl/backup/pdvc/modules/modeling.py b/yc2_univl/backup/pdvc/modules/modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..9551b488c16d04fad65dcdaeba7d73d7740f2902 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/modeling.py @@ -0,0 +1,429 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import numpy as np + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn import CrossEntropyLoss, MSELoss + +from pdvc.modules.until_module import PreTrainedModel, LayerNorm, CrossEn, MILNCELoss, MaxMarginRankingLoss +from pdvc.modules.module_bert import BertModel, BertConfig, BertOnlyMLMHead +from pdvc.modules.module_visual import VisualModel, VisualConfig, VisualOnlyMLMHead +from pdvc.modules.module_cross import CrossModel, CrossConfig +from pdvc.modules.module_decoder import DecoderModel, DecoderConfig + +logger = logging.getLogger(__name__) + + +class UniVLPreTrainedModel(PreTrainedModel, nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs): + # utilize bert config as base config + super(UniVLPreTrainedModel, self).__init__(bert_config) + self.bert_config = bert_config + self.visual_config = visual_config + self.cross_config = cross_config + self.decoder_config = decoder_config + + self.bert = None + self.visual = None + self.cross = None + self.decoder = None + + @classmethod + def from_pretrained(cls, pretrained_bert_name, visual_model_name, cross_model_name, decoder_model_name, + state_dict=None, cache_dir=None, type_vocab_size=2, *inputs, **kwargs): + + task_config = None + if "task_config" in kwargs.keys(): + task_config = kwargs["task_config"] + if not hasattr(task_config, "local_rank"): + task_config.__dict__["local_rank"] = 0 + elif task_config.local_rank == -1: + task_config.local_rank = 0 + print(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config) + bert_config, state_dict = BertConfig.get_config(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config=task_config) + visual_config, _ = VisualConfig.get_config(visual_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + cross_config, _ = CrossConfig.get_config(cross_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + decoder_config, _ = DecoderConfig.get_config(decoder_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config) + + model = cls(bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs) + + assert model.bert is not None + assert model.visual is not None + + if state_dict is not None: + model = cls.init_preweight(model, state_dict, task_config=task_config) + + return model + +class NormalizeVideo(nn.Module): + def __init__(self, task_config): + super(NormalizeVideo, self).__init__() + self.visual_norm2d = LayerNorm(task_config.video_dim) + + def forward(self, video): + video = torch.as_tensor(video).float() + video = video.view(-1, video.shape[-2], video.shape[-1]) + video = self.visual_norm2d(video) + return video + +def show_log(task_config, info): + if task_config is None or task_config.local_rank == 0: + logger.warning(info) + +def update_attr(target_name, target_config, target_attr_name, source_config, source_attr_name, default_value=None): + if hasattr(source_config, source_attr_name): + if default_value is None or getattr(source_config, source_attr_name) != default_value: + setattr(target_config, target_attr_name, getattr(source_config, source_attr_name)) + show_log(source_config, "Set {}.{}: {}.".format(target_name, + target_attr_name, getattr(target_config, target_attr_name))) + return target_config + +def check_attr(target_name, task_config): + return hasattr(task_config, target_name) and task_config.__dict__[target_name] + +class UniVL(UniVLPreTrainedModel): + def __init__(self, bert_config, visual_config, cross_config, decoder_config, task_config): + super(UniVL, self).__init__(bert_config, visual_config, cross_config, decoder_config) + self.task_config = task_config + self.ignore_video_index = -1 + + assert self.task_config.max_words <= bert_config.max_position_embeddings + assert self.task_config.max_words <= decoder_config.max_target_embeddings + assert self.task_config.max_frames <= visual_config.max_position_embeddings + assert self.task_config.max_words + self.task_config.max_frames <= cross_config.max_position_embeddings + + self._stage_one = True + self._stage_two = False + + if check_attr('stage_two', self.task_config): + self._stage_one = False + self._stage_two = self.task_config.stage_two + show_log(task_config, "Stage-One:{}, Stage-Two:{}".format(self._stage_one, self._stage_two)) + + self.train_sim_after_cross = False + if self._stage_one and check_attr('train_sim_after_cross', self.task_config): + self.train_sim_after_cross = True + show_log(task_config, "Test retrieval after cross encoder.") + + # Text Encoder ===> + bert_config = update_attr("bert_config", bert_config, "num_hidden_layers", + self.task_config, "text_num_hidden_layers") + # print('=================The bert config:==========/n',bert_config) + # print('=================The task config:==========/n',self.task_config) + self.bert = BertModel(bert_config) + bert_word_embeddings_weight = self.bert.embeddings.word_embeddings.weight + bert_position_embeddings_weight = self.bert.embeddings.position_embeddings.weight + # <=== End of Text Encoder + + # Video Encoder ===> + visual_config = update_attr("visual_config", visual_config, "num_hidden_layers", + self.task_config, "visual_num_hidden_layers") + self.visual = VisualModel(visual_config) + visual_word_embeddings_weight = self.visual.embeddings.word_embeddings.weight + # <=== End of Video Encoder + + if self._stage_one is False or self.train_sim_after_cross: + # Cross Encoder ===> + cross_config = update_attr("cross_config", cross_config, "num_hidden_layers", + self.task_config, "cross_num_hidden_layers") + self.cross = CrossModel(cross_config) + # <=== End of Cross Encoder + + if self.train_sim_after_cross is False: + # Decoder ===> + decoder_config = update_attr("decoder_config", decoder_config, "num_decoder_layers", + self.task_config, "decoder_num_hidden_layers") + self.decoder = DecoderModel(decoder_config, bert_word_embeddings_weight, bert_position_embeddings_weight) + # <=== End of Decoder + + if self.task_config.do_pretrain: + self.cls = BertOnlyMLMHead(bert_config, bert_word_embeddings_weight) + self.cls_visual = VisualOnlyMLMHead(visual_config, visual_word_embeddings_weight) + self.alm_loss_fct = CrossEntropyLoss(ignore_index=-1) + + self.similarity_dense = nn.Linear(bert_config.hidden_size, 1) + self.decoder_loss_fct = CrossEntropyLoss(ignore_index=-1) + + self.normalize_video = NormalizeVideo(task_config) + + mILNCELoss = MILNCELoss(batch_size=task_config.batch_size // task_config.n_gpu, n_pair=task_config.n_pair, ) + maxMarginRankingLoss = MaxMarginRankingLoss(margin=task_config.margin, + negative_weighting=task_config.negative_weighting, + batch_size=task_config.batch_size // task_config.n_gpu, + n_pair=task_config.n_pair, + hard_negative_rate=task_config.hard_negative_rate, ) + + if task_config.use_mil: + self.loss_fct = CrossEn() if self._stage_two else mILNCELoss + self._pretrain_sim_loss_fct = mILNCELoss + else: + self.loss_fct = CrossEn() if self._stage_two else maxMarginRankingLoss + self._pretrain_sim_loss_fct = maxMarginRankingLoss + + self.apply(self.init_weights) + + def forward(self, input_ids, token_type_ids, attention_mask, video, video_mask=None, + pairs_masked_text=None, pairs_token_labels=None, masked_video=None, video_labels_index=None, + input_caption_ids=None, decoder_mask=None, output_caption_ids=None): + + input_ids = input_ids.view(-1, input_ids.shape[-1]) + token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + video = self.normalize_video(video) + + if input_caption_ids is not None: + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + sequence_output, visual_output = self.get_sequence_visual_output(input_ids, token_type_ids, attention_mask, + video, video_mask, shaped=True) + + if self.training: + loss = 0. + if self._stage_one: + sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, + video_mask, shaped=True) + sim_loss = self.loss_fct(sim_matrix) + loss += sim_loss + + if self._stage_two: + if self.task_config.do_pretrain: + pairs_masked_text = pairs_masked_text.view(-1, pairs_masked_text.shape[-1]) + pairs_token_labels = pairs_token_labels.view(-1, pairs_token_labels.shape[-1]) + + masked_video = self.normalize_video(masked_video) + video_labels_index = video_labels_index.view(-1, video_labels_index.shape[-1]) + + sequence_output_alm, visual_output_alm = self.get_sequence_visual_output(pairs_masked_text, token_type_ids, + attention_mask, masked_video, video_mask, shaped=True) + + cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output_alm, visual_output_alm, attention_mask, video_mask) + sequence_cross_output, visual_cross_output = torch.split(cross_output, [attention_mask.size(-1), video_mask.size(-1)], dim=1) + + alm_loss = self._calculate_mlm_loss(sequence_cross_output, pairs_token_labels) + loss += alm_loss + + nce_loss = self._calculate_mfm_loss(visual_cross_output, video, video_mask, video_labels_index) + loss += nce_loss + + sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, video_mask, + shaped=True, _pretrain_joint=True) + sim_loss_joint = self._pretrain_sim_loss_fct(sim_matrix) + loss += sim_loss_joint + + if (input_caption_ids is not None) and \ + (self.task_config.do_pretrain + or (self.task_config.do_pretrain is False and self.task_config.task_type == "caption")): + if self.task_config.do_pretrain: + decoder_scores, res_tuples = self._get_decoder_score(sequence_output_alm, visual_output_alm, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + elif self.task_config.task_type == "caption": + decoder_scores, res_tuples = self._get_decoder_score(sequence_output, visual_output, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + else: + raise NotImplementedError + + output_caption_ids = output_caption_ids.view(-1, output_caption_ids.shape[-1]) + decoder_loss = self.decoder_loss_fct(decoder_scores.view(-1, self.bert_config.vocab_size), output_caption_ids.view(-1)) + loss += decoder_loss + + if self.task_config.do_pretrain or self.task_config.task_type == "retrieval": + if self.task_config.do_pretrain: + sim_matrix_text_visual = self.get_similarity_logits(sequence_output_alm, visual_output_alm, + attention_mask, video_mask, shaped=True) + elif self.task_config.task_type == "retrieval": + sim_matrix_text_visual = self.get_similarity_logits(sequence_output, visual_output, + attention_mask, video_mask, shaped=True) + else: + raise NotImplementedError + + sim_loss_text_visual = self.loss_fct(sim_matrix_text_visual) + loss += sim_loss_text_visual + + return loss + else: + return None + + def _calculate_mlm_loss(self, sequence_output_alm, pairs_token_labels): + alm_scores = self.cls(sequence_output_alm) + alm_loss = self.alm_loss_fct(alm_scores.view(-1, self.bert_config.vocab_size), pairs_token_labels.view(-1)) + return alm_loss + + def _calculate_mfm_loss(self, visual_output_alm, video, video_mask, video_labels_index): + afm_scores = self.cls_visual(visual_output_alm) + afm_scores_tr = afm_scores.view(-1, afm_scores.shape[-1]) + + video_tr = video.permute(2, 0, 1) + video_tr = video_tr.view(video_tr.shape[0], -1) + + logits_matrix = torch.mm(afm_scores_tr, video_tr) + video_mask_float = video_mask.to(dtype=torch.float) + mask_matrix = torch.mm(video_mask_float.view(-1, 1), video_mask_float.view(1, -1)) + masked_logits = logits_matrix + (1. - mask_matrix) * -1e8 + + logpt = F.log_softmax(masked_logits, dim=-1) + logpt = torch.diag(logpt) + nce_loss = -logpt + + video_labels_index_mask = (video_labels_index != self.ignore_video_index) + nce_loss = nce_loss.masked_select(video_labels_index_mask.view(-1)) + nce_loss = nce_loss.mean() + return nce_loss + + def get_sequence_visual_output(self, input_ids, token_type_ids, attention_mask, video, video_mask, shaped=False): + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + video = self.normalize_video(video) + encoded_layers, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True) + sequence_output = encoded_layers[-1] + + visual_layers, _ = self.visual(video, video_mask, output_all_encoded_layers=True) + visual_output = visual_layers[-1] + + return sequence_output, visual_output + + def _get_cross_output(self, sequence_output, visual_output, attention_mask, video_mask): + concat_features = torch.cat((sequence_output, visual_output), dim=1) # concatnate tokens and frames + concat_mask = torch.cat((attention_mask, video_mask), dim=1) + text_type_ = torch.zeros_like(attention_mask) + video_type_ = torch.ones_like(video_mask) + concat_type = torch.cat((text_type_, video_type_), dim=1) + + cross_layers, pooled_output = self.cross(concat_features, concat_type, concat_mask, output_all_encoded_layers=True) + cross_output = cross_layers[-1] + + return cross_output, pooled_output, concat_mask + + def _mean_pooling_for_similarity(self, sequence_output, visual_output, attention_mask, video_mask,): + attention_mask_un = attention_mask.to(dtype=torch.float).unsqueeze(-1) + attention_mask_un[:, 0, :] = 0. + sequence_output = sequence_output * attention_mask_un + text_out = torch.sum(sequence_output, dim=1) / torch.sum(attention_mask_un, dim=1, dtype=torch.float) + + video_mask_un = video_mask.to(dtype=torch.float).unsqueeze(-1) + visual_output = visual_output * video_mask_un + video_mask_un_sum = torch.sum(video_mask_un, dim=1, dtype=torch.float) + video_mask_un_sum[video_mask_un_sum == 0.] = 1. + video_out = torch.sum(visual_output, dim=1) / video_mask_un_sum + + return text_out, video_out + + def _cross_similarity(self, sequence_output, visual_output, attention_mask, video_mask): + b_text, s_text, h_text = sequence_output.size() + b_visual, s_visual, h_visual = visual_output.size() + + retrieve_logits_list = [] + step_size = 5 + + split_size = [step_size] * (b_text // step_size) + release_size = b_text - sum(split_size) + if release_size > 0: + split_size += [release_size] + + sequence_output_splits = torch.split(sequence_output, split_size, dim=0) + attention_mask_splits = torch.split(attention_mask, split_size, dim=0) + for i in range(len(split_size)): + sequence_output_row = sequence_output_splits[i] + attention_mask_row = attention_mask_splits[i] + sequence_output_l = sequence_output_row.unsqueeze(1).repeat(1, b_visual, 1, 1) + sequence_output_l = sequence_output_l.view(-1, s_text, h_text) + attention_mask_l = attention_mask_row.unsqueeze(1).repeat(1, b_visual, 1) + attention_mask_l = attention_mask_l.view(-1, s_text) + + step_truth = sequence_output_row.size(0) + visual_output_r = visual_output.unsqueeze(0).repeat(step_truth, 1, 1, 1) + visual_output_r = visual_output_r.view(-1, s_visual, h_visual) + video_mask_r = video_mask.unsqueeze(0).repeat(step_truth, 1, 1) + video_mask_r = video_mask_r.view(-1, s_visual) + + cross_output, pooled_output, concat_mask = \ + self._get_cross_output(sequence_output_l, visual_output_r, attention_mask_l, video_mask_r) + retrieve_logits_row = self.similarity_dense(pooled_output).squeeze(-1).view(step_truth, b_visual) + + retrieve_logits_list.append(retrieve_logits_row) + retrieve_logits = torch.cat(retrieve_logits_list, dim=0) + return retrieve_logits + + def get_similarity_logits(self, sequence_output, visual_output, attention_mask, video_mask, shaped=False, _pretrain_joint=False): + if shaped is False: + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + if (self._stage_two and _pretrain_joint is False) or self.train_sim_after_cross: + retrieve_logits = self._cross_similarity(sequence_output, visual_output, attention_mask, video_mask) + else: + text_out, video_out = self._mean_pooling_for_similarity(sequence_output, visual_output, attention_mask, video_mask) + if self.task_config.use_mil is False: + text_out = F.normalize(text_out, dim=-1) + video_out = F.normalize(video_out, dim=-1) + retrieve_logits = torch.matmul(text_out, video_out.t()) + + return retrieve_logits + + def _get_decoder_score(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, shaped=False): + + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + res_tuples = () + cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output, visual_output, attention_mask, video_mask) + decoder_scores = self.decoder(input_caption_ids, encoder_outs=cross_output, answer_mask=decoder_mask, encoder_mask=concat_mask) + + return decoder_scores, res_tuples + + def decoder_caption(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, + shaped=False, get_logits=False): + if shaped is False: + input_ids = input_ids.view(-1, input_ids.shape[-1]) + attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) + video_mask = video_mask.view(-1, video_mask.shape[-1]) + + input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1]) + decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1]) + + decoder_scores, _ = self._get_decoder_score(sequence_output, visual_output, + input_ids, attention_mask, video_mask, + input_caption_ids, decoder_mask, shaped=True) + + if get_logits: + return decoder_scores + + _, decoder_scores_result = torch.max(decoder_scores, -1) + + return decoder_scores_result \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/module_bert.py b/yc2_univl/backup/pdvc/modules/module_bert.py new file mode 100644 index 0000000000000000000000000000000000000000..aa376657fdf271f11978379665a67897c2cc5943 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/module_bert.py @@ -0,0 +1,447 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz", +} + +CONFIG_NAME = 'bert_config.json' +WEIGHTS_NAME = 'pytorch_model.bin' + + +class BertConfig(PretrainedConfig): + """Configuration class to store the configuration of a `BertModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs BertConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + layer = BertLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class BertPooler(nn.Module): + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(bert_model_embedding_weights.size(1), + bert_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = bert_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + +class BertModel(PreTrainedModel): + """BERT model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a BertConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see BERT's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) + ``` + """ + def __init__(self, config): + super(BertModel, self).__init__(config) + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_weights) + + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/module_cross.py b/yc2_univl/backup/pdvc/modules/module_cross.py new file mode 100644 index 0000000000000000000000000000000000000000..8ff41910a2c62e1c79ab3f843bef3c54171bb026 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/module_cross.py @@ -0,0 +1,394 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'cross_config.json' +WEIGHTS_NAME = 'cross_pytorch_model.bin' + + +class CrossConfig(PretrainedConfig): + """Configuration class to store the configuration of a `CrossModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02): + """Constructs CrossConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CrossModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `CrossModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + + +class CrossEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(CrossEmbeddings, self).__init__() + + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, concat_embeddings, concat_type=None): + + batch_size, seq_length = concat_embeddings.size(0), concat_embeddings.size(1) + if concat_type is None: + concat_type = torch.zeros(batch_size, concat_type).to(concat_embeddings.device) + + position_ids = torch.arange(seq_length, dtype=torch.long, device=concat_embeddings.device) + position_ids = position_ids.unsqueeze(0).expand(concat_embeddings.size(0), -1) + + token_type_embeddings = self.token_type_embeddings(concat_type) + position_embeddings = self.position_embeddings(position_ids) + + embeddings = concat_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class CrossSelfAttention(nn.Module): + def __init__(self, config): + super(CrossSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in CrossModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class CrossSelfOutput(nn.Module): + def __init__(self, config): + super(CrossSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class CrossAttention(nn.Module): + def __init__(self, config): + super(CrossAttention, self).__init__() + self.self = CrossSelfAttention(config) + self.output = CrossSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class CrossIntermediate(nn.Module): + def __init__(self, config): + super(CrossIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class CrossOutput(nn.Module): + def __init__(self, config): + super(CrossOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class CrossLayer(nn.Module): + def __init__(self, config): + super(CrossLayer, self).__init__() + self.attention = CrossAttention(config) + self.intermediate = CrossIntermediate(config) + self.output = CrossOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class CrossEncoder(nn.Module): + def __init__(self, config): + super(CrossEncoder, self).__init__() + layer = CrossLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class CrossPooler(nn.Module): + def __init__(self, config): + super(CrossPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class CrossPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(CrossPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class CrossLMPredictionHead(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossLMPredictionHead, self).__init__() + self.transform = CrossPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(cross_model_embedding_weights.size(1), + cross_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = cross_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(cross_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class CrossOnlyMLMHead(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossOnlyMLMHead, self).__init__() + self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class CrossOnlyNSPHead(nn.Module): + def __init__(self, config): + super(CrossOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class CrossPreTrainingHeads(nn.Module): + def __init__(self, config, cross_model_embedding_weights): + super(CrossPreTrainingHeads, self).__init__() + self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class CrossModel(PreTrainedModel): + def __init__(self, config): + super(CrossModel, self).__init__(config) + self.embeddings = CrossEmbeddings(config) + self.encoder = CrossEncoder(config) + self.pooler = CrossPooler(config) + self.apply(self.init_weights) + + def forward(self, concat_input, concat_type=None, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones(concat_input.size(0), concat_input.size(1)) + if concat_type is None: + concat_type = torch.zeros_like(attention_mask) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(concat_input, concat_type) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output diff --git a/yc2_univl/backup/pdvc/modules/module_decoder.py b/yc2_univl/backup/pdvc/modules/module_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..25622d1e4c0e9a0d19fe2b4986f7267ba1526823 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/module_decoder.py @@ -0,0 +1,406 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil +import numpy as np + +import torch +from torch import nn +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'decoder_config.json' +WEIGHTS_NAME = 'decoder_pytorch_model.bin' + + +class DecoderConfig(PretrainedConfig): + """Configuration class to store the configuration of a `DecoderModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + type_vocab_size=2, + initializer_range=0.02, + max_target_embeddings=128, + num_decoder_layers=1): + """Constructs DecoderConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `DecoderModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `DecoderModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + max_target_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + num_decoder_layers: + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.max_target_embeddings = max_target_embeddings + self.num_decoder_layers = num_decoder_layers + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + +class BertIntermediate(nn.Module): + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, decoder_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(decoder_model_embedding_weights.size(1), + decoder_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = decoder_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(decoder_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, decoder_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config, decoder_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + +class MultiHeadAttention(nn.Module): + ''' Multi-Head Attention module ''' + + def __init__(self, config): + super(MultiHeadAttention, self).__init__() + + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, q, k, v, attention_mask): + mixed_query_layer = self.query(q) + mixed_key_layer = self.key(k) + mixed_value_layer = self.value(v) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + return context_layer, attention_scores + +class PositionwiseFeedForward(nn.Module): + ''' A two-feed-forward-layer module ''' + + def __init__(self, d_in, d_hid, dropout=0.1): + super().__init__() + self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise + self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise + self.layer_norm = nn.LayerNorm(d_in) + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + residual = x + output = x.transpose(1, 2) + output = self.w_2(ACT2FN["gelu"](self.w_1(output))) + output = output.transpose(1, 2) + output = self.dropout(output) + output = self.layer_norm(output + residual) + return output + +class DecoderAttention(nn.Module): + def __init__(self, config): + super(DecoderAttention, self).__init__() + self.att = MultiHeadAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, q, k, v, attention_mask): + att_output, attention_probs = self.att(q, k, v, attention_mask) + attention_output = self.output(att_output, q) + return attention_output, attention_probs + +class DecoderLayer(nn.Module): + def __init__(self, config): + super(DecoderLayer, self).__init__() + self.slf_attn = DecoderAttention(config) + self.enc_attn = DecoderAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, dec_input, enc_output, slf_attn_mask=None, dec_enc_attn_mask=None): + slf_output, _ = self.slf_attn(dec_input, dec_input, dec_input, slf_attn_mask) + dec_output, dec_att_scores = self.enc_attn(slf_output, enc_output, enc_output, dec_enc_attn_mask) + intermediate_output = self.intermediate(dec_output) + dec_output = self.output(intermediate_output, dec_output) + return dec_output, dec_att_scores + +class DecoderEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight): + super(DecoderEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_target_embeddings, config.hidden_size) + self.word_embeddings.weight = decoder_word_embeddings_weight + self.position_embeddings.weight = decoder_position_embeddings_weight + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids): + seq_length = input_ids.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + + embeddings = words_embeddings + position_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class Decoder(nn.Module): + def __init__(self, config): + super(Decoder, self).__init__() + layer = DecoderLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_decoder_layers)]) + + def forward(self, hidden_states, encoder_outs, self_attn_mask, attention_mask, output_all_encoded_layers=False): + dec_att_scores = None + all_encoder_layers = [] + all_dec_att_probs = [] + for layer_module in self.layer: + hidden_states, dec_att_scores = layer_module(hidden_states, encoder_outs, self_attn_mask, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + all_dec_att_probs.append(dec_att_scores) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + all_dec_att_probs.append(dec_att_scores) + return all_encoder_layers, all_dec_att_probs + +class DecoderClassifier(nn.Module): + def __init__(self, config, embedding_weights): + super(DecoderClassifier, self).__init__() + self.cls = BertOnlyMLMHead(config, embedding_weights) + + def forward(self, hidden_states): + cls_scores = self.cls(hidden_states) + return cls_scores + +class DecoderModel(PreTrainedModel): + + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + final_norm (bool, optional): apply layer norm to the output of the + final decoder layer (default: True). + """ + + def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight): + super(DecoderModel, self).__init__(config) + self.config = config + self.max_target_length = config.max_target_embeddings + self.embeddings = DecoderEmbeddings(config, decoder_word_embeddings_weight, decoder_position_embeddings_weight) + self.decoder = Decoder(config) + self.classifier = DecoderClassifier(config, decoder_word_embeddings_weight) + self.apply(self.init_weights) + + def forward(self, input_ids, encoder_outs=None, answer_mask=None, encoder_mask=None): + """ + Args: + input_ids (LongTensor): previous decoder outputs of shape `(batch, tgt_len)`, for input feeding/teacher forcing + encoder_outs (Tensor, optional): output from the encoder, used for encoder-side attention + + Returns: + tuple: + - the last decoder layer's output of shape `(batch, tgt_len, vocab)` + - the last decoder layer's attention weights of shape `(batch, tgt_len, src_len)` + """ + embedding_output = self.embeddings(input_ids) + + extended_encoder_mask = encoder_mask.unsqueeze(1).unsqueeze(2) # b x 1 x 1 x ls + extended_encoder_mask = extended_encoder_mask.to(dtype=self.dtype) # fp16 compatibility + extended_encoder_mask = (1.0 - extended_encoder_mask) * -10000.0 + + extended_answer_mask = answer_mask.unsqueeze(1).unsqueeze(2) + extended_answer_mask = extended_answer_mask.to(dtype=self.dtype) # fp16 compatibility + + sz_b, len_s, _ = embedding_output.size() + subsequent_mask = torch.triu(torch.ones((len_s, len_s), device=embedding_output.device, dtype=embedding_output.dtype), diagonal=1) + self_attn_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1).unsqueeze(1) # b x 1 x ls x ls + slf_attn_mask = ((1.0 - extended_answer_mask) + self_attn_mask).gt(0).to(dtype=self.dtype) + self_attn_mask = slf_attn_mask * -10000.0 + + decoded_layers, dec_att_scores = self.decoder(embedding_output, + encoder_outs, + self_attn_mask, + extended_encoder_mask, + ) + sequence_output = decoded_layers[-1] + cls_scores = self.classifier(sequence_output) + + return cls_scores diff --git a/yc2_univl/backup/pdvc/modules/module_visual.py b/yc2_univl/backup/pdvc/modules/module_visual.py new file mode 100644 index 0000000000000000000000000000000000000000..b9a43f8a74c1e5e020c8b4daec33d7adb5d3b840 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/module_visual.py @@ -0,0 +1,425 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import math +import logging +import tarfile +import tempfile +import shutil + +import torch +from torch import nn +import torch.nn.functional as F +from .file_utils import cached_path +from .until_config import PretrainedConfig +from .until_module import PreTrainedModel, LayerNorm, ACT2FN + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = {} +CONFIG_NAME = 'visual_config.json' +WEIGHTS_NAME = 'visual_pytorch_model.bin' + + +class VisualConfig(PretrainedConfig): + """Configuration class to store the configuration of a `VisualModel`. + """ + pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP + config_name = CONFIG_NAME + weights_name = WEIGHTS_NAME + def __init__(self, + vocab_size_or_config_json_file=4096, + hidden_size=768, + num_hidden_layers=3, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + initializer_range=0.02): + """Constructs VisualConfig. + + Args: + vocab_size_or_config_json_file: Size of the encoder layers and the pooler layer. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.initializer_range = initializer_range + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + +class VisualEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + def __init__(self, config): + super(VisualEmbeddings, self).__init__() + + self.word_embeddings = nn.Linear(config.vocab_size, config.hidden_size) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_embeddings): + seq_length = input_embeddings.size(1) + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_embeddings.device) + position_ids = position_ids.unsqueeze(0).expand(input_embeddings.size(0), -1) + + words_embeddings = self.word_embeddings(input_embeddings) + # words_embeddings = self.transform_act_fn(words_embeddings) + + position_embeddings = self.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + +class VisualSelfAttention(nn.Module): + def __init__(self, config): + super(VisualSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in VisualModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer + + +class VisualSelfOutput(nn.Module): + def __init__(self, config): + super(VisualSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class VisualAttention(nn.Module): + def __init__(self, config): + super(VisualAttention, self).__init__() + self.self = VisualSelfAttention(config) + self.output = VisualSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output + + +class VisualIntermediate(nn.Module): + def __init__(self, config): + super(VisualIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + self.intermediate_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class VisualOutput(nn.Module): + def __init__(self, config): + super(VisualOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class VisualLayer(nn.Module): + def __init__(self, config): + super(VisualLayer, self).__init__() + self.attention = VisualAttention(config) + self.intermediate = VisualIntermediate(config) + self.output = VisualOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output = self.attention(hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + +class VisualEncoder(nn.Module): + def __init__(self, config): + super(VisualEncoder, self).__init__() + layer = VisualLayer(config) + self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): + all_encoder_layers = [] + for layer_module in self.layer: + hidden_states = layer_module(hidden_states, attention_mask) + if output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + if not output_all_encoded_layers: + all_encoder_layers.append(hidden_states) + return all_encoder_layers + + +class VisualPooler(nn.Module): + def __init__(self, config): + super(VisualPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class VisualPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(VisualPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.transform_act_fn = ACT2FN[config.hidden_act] \ + if isinstance(config.hidden_act, str) else config.hidden_act + self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class VisualLMPredictionHead(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualLMPredictionHead, self).__init__() + self.transform = VisualPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.weight = visual_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(visual_model_embedding_weights.size(1))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = hidden_states.matmul(self.weight) + self.bias + return hidden_states + + +class VisualOnlyMLMHead(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualOnlyMLMHead, self).__init__() + self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class VisualOnlyNSPHead(nn.Module): + def __init__(self, config): + super(VisualOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class VisualPreTrainingHeads(nn.Module): + def __init__(self, config, visual_model_embedding_weights): + super(VisualPreTrainingHeads, self).__init__() + self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class VisualModel(PreTrainedModel): + """Visual model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a VisualConfig class instance with the configuration to build a new model + + Inputs: + `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`] + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for Visual-base, 24 for Visual-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLF`) to train on the Next-Sentence task (see 's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + + config = modeling.VisualConfig(vocab_size_or_config_json_file=4096, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.VisualModel(config=config) + all_encoder_layers, pooled_output = model(video, video_mask) + ``` + """ + def __init__(self, config): + super(VisualModel, self).__init__(config) + self.embeddings = VisualEmbeddings(config) + self.encoder = VisualEncoder(config) + self.pooler = VisualPooler(config) + self.apply(self.init_weights) + + def forward(self, video, attention_mask=None, output_all_encoded_layers=True): + + if attention_mask is None: + attention_mask = torch.ones(video.size(0), video.size(1)) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(video) + encoded_layers = self.encoder(embedding_output, + extended_attention_mask, + output_all_encoded_layers=output_all_encoded_layers) + sequence_output = encoded_layers[-1] + pooled_output = self.pooler(sequence_output) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + return encoded_layers, pooled_output \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/optimization.py b/yc2_univl/backup/pdvc/modules/optimization.py new file mode 100644 index 0000000000000000000000000000000000000000..264c57c7d8f213004b4ee82a8861e0ae6103c906 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/optimization.py @@ -0,0 +1,168 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch optimization for BERT model.""" + +import math +import torch +from torch.optim import Optimizer +from torch.optim.optimizer import required +from torch.nn.utils import clip_grad_norm_ +import logging + +logger = logging.getLogger(__name__) + +def warmup_cosine(x, warmup=0.002): + if x < warmup: + return x/warmup + return 0.5 * (1.0 + torch.cos(math.pi * x)) + +def warmup_constant(x, warmup=0.002): + """ Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps. + Learning rate is 1. afterwards. """ + if x < warmup: + return x/warmup + return 1.0 + +def warmup_linear(x, warmup=0.002): + """ Specifies a triangular learning rate schedule where peak is reached at `warmup`*`t_total`-th (as provided to BertAdam) training step. + After `t_total`-th training step, learning rate is zero. """ + if x < warmup: + return x/warmup + return max((x-1.)/(warmup-1.), 0) + +SCHEDULES = { + 'warmup_cosine': warmup_cosine, + 'warmup_constant': warmup_constant, + 'warmup_linear': warmup_linear, +} + + +class BertAdam(Optimizer): + """Implements BERT version of Adam algorithm with weight decay fix. + Params: + lr: learning rate + warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1 + t_total: total number of training steps for the learning + rate schedule, -1 means constant learning rate. Default: -1 + schedule: schedule to use for the warmup (see above). Default: 'warmup_linear' + b1: Adams b1. Default: 0.9 + b2: Adams b2. Default: 0.999 + e: Adams epsilon. Default: 1e-6 + weight_decay: Weight decay. Default: 0.01 + max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0 + """ + def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear', + b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, + max_grad_norm=1.0): + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr)) + if schedule not in SCHEDULES: + raise ValueError("Invalid schedule parameter: {}".format(schedule)) + if not 0.0 <= warmup < 1.0 and not warmup == -1: + raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup)) + if not 0.0 <= b1 < 1.0: + raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1)) + if not 0.0 <= b2 < 1.0: + raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2)) + if not e >= 0.0: + raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e)) + defaults = dict(lr=lr, schedule=schedule, warmup=warmup, t_total=t_total, + b1=b1, b2=b2, e=e, weight_decay=weight_decay, + max_grad_norm=max_grad_norm) + super(BertAdam, self).__init__(params, defaults) + + def get_lr(self): + lr = [] + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + state = self.state[p] + if len(state) == 0: + return [0] + if group['t_total'] != -1: + schedule_fct = SCHEDULES[group['schedule']] + lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup']) + else: + lr_scheduled = group['lr'] + lr.append(lr_scheduled) + return lr + + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['next_m'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['next_v'] = torch.zeros_like(p.data) + + next_m, next_v = state['next_m'], state['next_v'] + beta1, beta2 = group['b1'], group['b2'] + + # Add grad clipping + if group['max_grad_norm'] > 0: + clip_grad_norm_(p, group['max_grad_norm']) + + # Decay the first and second moment running average coefficient + # In-place operations to update the averages at the same time + # next_m.mul_(beta1).add_(1 - beta1, grad) --> pytorch 1.7 + next_m.mul_(beta1).add_(grad, alpha=1 - beta1) + # next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad) --> pytorch 1.7 + next_v.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + update = next_m / (next_v.sqrt() + group['e']) + + # Just adding the square of the weights to the loss function is *not* + # the correct way of using L2 regularization/weight decay with Adam, + # since that will interact with the m and v parameters in strange ways. + # + # Instead we want to decay the weights in a manner that doesn't interact + # with the m/v parameters. This is equivalent to adding the square + # of the weights to the loss with plain (non-momentum) SGD. + if group['weight_decay'] > 0.0: + update += group['weight_decay'] * p.data + + if group['t_total'] != -1: + schedule_fct = SCHEDULES[group['schedule']] + progress = state['step']/group['t_total'] + lr_scheduled = group['lr'] * schedule_fct(progress, group['warmup']) + else: + lr_scheduled = group['lr'] + + update_with_lr = lr_scheduled * update + p.data.add_(-update_with_lr) + + state['step'] += 1 + + return loss \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/tokenization.py b/yc2_univl/backup/pdvc/modules/tokenization.py new file mode 100644 index 0000000000000000000000000000000000000000..183c81000f82aae59295f8d8572b6bcf67891790 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/tokenization.py @@ -0,0 +1,408 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import unicodedata +import os +import sys +import logging + +from .file_utils import cached_path + +logger = logging.getLogger(__name__) +PRETRAINED_VOCAB_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", +} +PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = { + 'base-uncased': 512, + 'large-uncased': 512, + 'base-cased': 512, + 'large-cased': 512, + 'base-multilingual-uncased': 512, + 'base-multilingual-cased': 512, + 'base-chinese': 512, +} +VOCAB_NAME = 'vocab.txt' + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r", encoding="utf-8") as reader: + while True: + token = reader.readline() + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class BertTokenizer(object): + """Runs end-to-end tokenization: punctuation splitting""" + + def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")): + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)) + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + self.max_len = max_len if max_len is not None else int(1e12) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Converts a sequence of tokens into ids using the vocab.""" + ids = [] + for token in tokens: + if token not in self.vocab: + ids.append(self.vocab["[UNK]"]) + logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token)) + else: + ids.append(self.vocab[token]) + if len(ids) > self.max_len: + raise ValueError( + "Token indices sequence length is longer than the specified maximum " + " sequence length for this BERT model ({} > {}). Running this" + " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) + ) + return ids + + def convert_ids_to_tokens(self, ids): + """Converts a sequence of ids in tokens using the vocab.""" + tokens = [] + for i in ids: + tokens.append(self.ids_to_tokens[i]) + return tokens + + @classmethod + def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedBertModel from a pre-trained model file. + Download and cache the pre-trained model file if needed. + """ + vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(vocab_file) is False: + if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP: + vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name] + else: + vocab_file = pretrained_model_name + if os.path.isdir(vocab_file): + vocab_file = os.path.join(vocab_file, VOCAB_NAME) + # redirect to the cache, if necessary + print(vocab_file) + try: + resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir) + except FileNotFoundError: + logger.error( + "Model name '{}' was not found. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + vocab_file)) + return None + if resolved_vocab_file == vocab_file: + logger.info("loading vocabulary file {}".format(vocab_file)) + else: + logger.info("loading vocabulary file {} from cache at {}".format( + vocab_file, resolved_vocab_file)) + if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP: + # if we're using a pretrained model, ensure the tokenizer wont index sequences longer + # than the number of positional embeddings + max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name] + kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len) + kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]") + + # Instantiate tokenizer. + tokenizer = cls(resolved_vocab_file, *inputs, **kwargs) + + return tokenizer + + def add_tokens(self, new_tokens, model): + """ + Add a list of new tokens to the tokenizer class. If the new tokens are not in the + vocabulary, they are added to it with indices starting from length of the current vocabulary. + Args: + new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them). + Returns: + Number of tokens added to the vocabulary. + Examples:: + # Let's see how to increase the vocabulary of Bert model and tokenizer + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained('bert-base-uncased') + num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2']) + print('We have added', num_added_toks, 'tokens') + model.resize_token_embeddings(len(tokenizer)) # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer. + """ + + to_add_tokens = [] + for token in new_tokens: + assert isinstance(token, str) + to_add_tokens.append(token) + # logger.info("Adding %s to the vocabulary", token) + + vocab = collections.OrderedDict() + for token in self.vocab.keys(): + vocab[token] = self.vocab[token] + for token in to_add_tokens: + vocab[token] = len(vocab) + self.vocab = self.wordpiece_tokenizer.vocab = vocab + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + + model.resize_token_embeddings(new_num_tokens=len(vocab)) + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + self.never_split = never_split + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case and token not in self.never_split: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + if text in self.never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + +class WordpieceTokenizer(object): + """Runs WordPiece tokenization.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer`. + + Returns: + A list of wordpiece tokens. + """ + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/yc2_univl/backup/pdvc/modules/until_config.py b/yc2_univl/backup/pdvc/modules/until_config.py new file mode 100644 index 0000000000000000000000000000000000000000..596c157aa23c82eb33c1fb2e07d9b006a52990e9 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/until_config.py @@ -0,0 +1,126 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import copy +import json +import logging +import tarfile +import tempfile +import shutil +import torch +from .file_utils import cached_path + +logger = logging.getLogger(__name__) + +class PretrainedConfig(object): + + pretrained_model_archive_map = {} + config_name = "" + weights_name = "" + + @classmethod + def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None): + archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name) + if os.path.exists(archive_file) is False: + if pretrained_model_name in cls.pretrained_model_archive_map: + archive_file = cls.pretrained_model_archive_map[pretrained_model_name] + else: + archive_file = pretrained_model_name + + # redirect to the cache, if necessary + try: + resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir) + except FileNotFoundError: + if task_config is None or task_config.local_rank == 0: + logger.error( + "Model name '{}' was not found in model name list. " + "We assumed '{}' was a path or url but couldn't find any file " + "associated to this path or url.".format( + pretrained_model_name, + archive_file)) + return None + if resolved_archive_file == archive_file: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {}".format(archive_file)) + else: + if task_config is None or task_config.local_rank == 0: + logger.info("loading archive file {} from cache at {}".format( + archive_file, resolved_archive_file)) + tempdir = None + if os.path.isdir(resolved_archive_file): + serialization_dir = resolved_archive_file + else: + # Extract archive to temp dir + tempdir = tempfile.mkdtemp() + if task_config is None or task_config.local_rank == 0: + logger.info("extracting archive file {} to temp dir {}".format( + resolved_archive_file, tempdir)) + with tarfile.open(resolved_archive_file, 'r:gz') as archive: + archive.extractall(tempdir) + serialization_dir = tempdir + # Load config + config_file = os.path.join(serialization_dir, cls.config_name) + config = cls.from_json_file(config_file) + config.type_vocab_size = type_vocab_size + if task_config is None or task_config.local_rank == 0: + logger.info("Model config {}".format(config)) + + if state_dict is None: + weights_path = os.path.join(serialization_dir, cls.weights_name) + if os.path.exists(weights_path): + state_dict = torch.load(weights_path, map_location='cpu') + else: + if task_config is None or task_config.local_rank == 0: + logger.info("Weight doesn't exsits. {}".format(weights_path)) + + if tempdir: + # Clean up temp dir + shutil.rmtree(tempdir) + + return config, state_dict + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = cls(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/until_module.py b/yc2_univl/backup/pdvc/modules/until_module.py new file mode 100644 index 0000000000000000000000000000000000000000..d550638157f8aeb2116a9cce022b2c563fd3491b --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/until_module.py @@ -0,0 +1,251 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +import logging +import numpy as np +import torch +from torch import nn +import torch.nn.functional as F +import math +from pdvc.modules.until_config import PretrainedConfig + +logger = logging.getLogger(__name__) + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +def swish(x): + return x * torch.sigmoid(x) + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish} + +class LayerNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-12): + """Construct a layernorm module in the TF style (epsilon inside the square root). + """ + super(LayerNorm, self).__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.bias = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward(self, x): + u = x.mean(-1, keepdim=True) + s = (x - u).pow(2).mean(-1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.variance_epsilon) + return self.weight * x + self.bias + +class PreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + def __init__(self, config, *inputs, **kwargs): + super(PreTrainedModel, self).__init__() + if not isinstance(config, PretrainedConfig): + raise ValueError( + "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " + "To create a model from a Google pretrained model use " + "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( + self.__class__.__name__, self.__class__.__name__ + )) + self.config = config + + def init_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, LayerNorm): + if 'beta' in dir(module) and 'gamma' in dir(module): + module.beta.data.zero_() + module.gamma.data.fill_(1.0) + else: + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + def resize_token_embeddings(self, new_num_tokens=None): + raise NotImplementedError + + @classmethod + def init_preweight(cls, model, state_dict, prefix=None, task_config=None): + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + if prefix is not None: + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + old_keys.append(key) + new_keys.append(prefix + key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(model, prefix='') + + if prefix is None and (task_config is None or task_config.local_rank == 0): + logger.info("-" * 20) + if len(missing_keys) > 0: + logger.info("Weights of {} not initialized from pretrained model: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(missing_keys))) + if len(unexpected_keys) > 0: + logger.info("Weights from pretrained model not used in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(unexpected_keys))) + if len(error_msgs) > 0: + logger.error("Weights from pretrained model cause errors in {}: {}" + .format(model.__class__.__name__, "\n " + "\n ".join(error_msgs))) + + return model + + @property + def dtype(self): + """ + :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype). + """ + try: + return next(self.parameters()).dtype + except StopIteration: + # For nn.DataParallel compatibility in PyTorch 1.5 + def find_tensor_attributes(module: nn.Module): + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = self._named_members(get_members_fn=find_tensor_attributes) + first_tuple = next(gen) + return first_tuple[1].dtype + + @classmethod + def from_pretrained(cls, config, state_dict=None, *inputs, **kwargs): + """ + Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict. + Download and cache the pre-trained model file if needed. + """ + # Instantiate model. + model = cls(config, *inputs, **kwargs) + if state_dict is None: + return model + model = cls.init_preweight(model, state_dict) + + return model + +################################## +###### LOSS FUNCTION ############# +################################## +class CrossEn(nn.Module): + def __init__(self,): + super(CrossEn, self).__init__() + + def forward(self, sim_matrix): + logpt = F.log_softmax(sim_matrix, dim=-1) + logpt = torch.diag(logpt) + nce_loss = -logpt + sim_loss = nce_loss.mean() + return sim_loss + +class MILNCELoss(nn.Module): + def __init__(self, batch_size=1, n_pair=1,): + super(MILNCELoss, self).__init__() + self.batch_size = batch_size + self.n_pair = n_pair + torch_v = float(".".join(torch.__version__.split(".")[:2])) + self.bool_dtype = torch.bool if torch_v >= 1.3 else torch.uint8 + + def forward(self, sim_matrix): + mm_mask = np.eye(self.batch_size) + mm_mask = np.kron(mm_mask, np.ones((self.n_pair, self.n_pair))) + mm_mask = torch.tensor(mm_mask).float().to(sim_matrix.device) + + from_text_matrix = sim_matrix + mm_mask * -1e12 + from_video_matrix = sim_matrix.transpose(1, 0) + + new_sim_matrix = torch.cat([from_video_matrix, from_text_matrix], dim=-1) + logpt = F.log_softmax(new_sim_matrix, dim=-1) + + mm_mask_logpt = torch.cat([mm_mask, torch.zeros_like(mm_mask)], dim=-1) + masked_logpt = logpt + (torch.ones_like(mm_mask_logpt) - mm_mask_logpt) * -1e12 + + new_logpt = -torch.logsumexp(masked_logpt, dim=-1) + + logpt_choice = torch.zeros_like(new_logpt) + mark_ind = torch.arange(self.batch_size).to(sim_matrix.device) * self.n_pair + (self.n_pair//2) + logpt_choice[mark_ind] = 1 + sim_loss = new_logpt.masked_select(logpt_choice.to(dtype=self.bool_dtype)).mean() + return sim_loss + +class MaxMarginRankingLoss(nn.Module): + def __init__(self, + margin=1.0, + negative_weighting=False, + batch_size=1, + n_pair=1, + hard_negative_rate=0.5, + ): + super(MaxMarginRankingLoss, self).__init__() + self.margin = margin + self.n_pair = n_pair + self.batch_size = batch_size + easy_negative_rate = 1 - hard_negative_rate + self.easy_negative_rate = easy_negative_rate + self.negative_weighting = negative_weighting + if n_pair > 1 and batch_size > 1: + alpha = easy_negative_rate / ((batch_size - 1) * (1 - easy_negative_rate)) + mm_mask = (1 - alpha) * np.eye(self.batch_size) + alpha + mm_mask = np.kron(mm_mask, np.ones((n_pair, n_pair))) + mm_mask = torch.tensor(mm_mask) * (batch_size * (1 - easy_negative_rate)) + self.mm_mask = mm_mask.float() + + def forward(self, x): + d = torch.diag(x) + max_margin = F.relu(self.margin + x - d.view(-1, 1)) + \ + F.relu(self.margin + x - d.view(1, -1)) + if self.negative_weighting and self.n_pair > 1 and self.batch_size > 1: + max_margin = max_margin * self.mm_mask.to(max_margin.device) + return max_margin.mean() \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json b/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json new file mode 100644 index 0000000000000000000000000000000000000000..324fcb6e7ba63166767adf9afa82324412247a48 --- /dev/null +++ b/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json @@ -0,0 +1,12 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 1, + "vocab_size": 1024 +} diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..5f86c9097b3b6f4b7f50b9d70f7cd58b2f386871 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO @@ -0,0 +1,6 @@ +Metadata-Version: 2.1 +Name: MultiScaleDeformableAttention +Version: 1.0 +Summary: PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention +Home-page: https://github.com/fundamentalvision/Deformable-DETR +Author: Weijie Su diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc251e74aff93cae99a730109d3f696ef326b210 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt @@ -0,0 +1,13 @@ +setup.py +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp +/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu +MultiScaleDeformableAttention.egg-info/PKG-INFO +MultiScaleDeformableAttention.egg-info/SOURCES.txt +MultiScaleDeformableAttention.egg-info/dependency_links.txt +MultiScaleDeformableAttention.egg-info/top_level.txt +functions/__init__.py +functions/ms_deform_attn_func.py +modules/__init__.py +modules/ms_deform_attn.py +modules/ms_deform_attn_for_caption.py \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..25d8f7790d14d04a74c6acec779aedb3688ef630 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt @@ -0,0 +1,3 @@ +MultiScaleDeformableAttention +functions +modules diff --git a/yc2_univl/backup/pdvc/ops/__init__.py b/yc2_univl/backup/pdvc/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ed3d8ddca46efead59543bfd2f1961790abdc96 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c722836c6716e99f5a33542ebc2461e4540b9c0 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..affe1b85a7c92a8c1ecfca0d0b2c329ce77bf383 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5720c8c8f59f4168baf51ec63ba9c5f5e90d5abb998c0fbdd6170547d23a13 +size 7942000 diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +try: + import MultiScaleDeformableAttention as MSDA +except: + pass + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + ctx.im2col_step = im2col_step + output = MSDA.ms_deform_attn_forward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value, grad_sampling_loc, grad_attn_weight = \ + MSDA.ms_deform_attn_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head + + _, Lq_, M_, L_, P_, _ = sampling_locations.shape # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points + + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1] + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='border', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + + if return_value: + return torch.stack(sampling_value_list, dim=-2) + #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn import MSDeformAttn +from .ms_deform_attn_for_caption import MSDeformAttnCap \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points ) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + if query.device.type == 'cuda': + output = MSDeformAttnFunction.apply( + value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, + self.im2col_step) + else: + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + output = self.output_proj(output) + return output diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py @@ -0,0 +1,123 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttnCap(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + grid_init = grid_init - grid_init.mean(2, keepdim=True) + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 1 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights, + return_value=True) + + return output diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps new file mode 100644 index 0000000000000000000000000000000000000000..2bef29d420f02b4282644cba394698912212dab8 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps differ diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log new file mode 100644 index 0000000000000000000000000000000000000000..fd78ae63cd064bb569f9279931f2e0668833f50d --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log @@ -0,0 +1,4 @@ +# ninja log v5 +0 2930 1685020146224081877 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o 8f7db54445222f0 +0 10580 1685020153869972218 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o 91f10249ca524b9b +0 13795 1685020157081510628 /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o 3e48c35d2c631cee diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja new file mode 100644 index 0000000000000000000000000000000000000000..9d156fb45877ed14f310b8ae1f889c048fe0fa2b --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja @@ -0,0 +1,30 @@ +ninja_required_version = 1.3 +cxx = c++ +nvcc = /usr/local/cuda/bin/nvcc + +cflags = -pthread -B /home/liuhuabin/miniconda3/envs/PDVC/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c +post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 +cuda_cflags = -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 -std=c++14 +ldflags = + +rule compile + command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags + depfile = $out.d + deps = gcc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags + + + +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o: cuda_compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu +build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp + + + + + diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o new file mode 100644 index 0000000000000000000000000000000000000000..d30f1ff54acc23e3e0f5ea22b3a8828fdd2c44b7 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59afa2abc476414b1faa6816920a93293fc9e71aa96d790c80760a879f5d0682 +size 1437672 diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o new file mode 100644 index 0000000000000000000000000000000000000000..d9274a1b895a7c123eab8231e2e24c2ea6629581 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973f1d16162f782172da95253065226cd068f45430bbc1a8920929ffda09947d +size 920176 diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o new file mode 100644 index 0000000000000000000000000000000000000000..e771be34bcbacfa86a2e41f1728b9d0b2fef3a85 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad8100cd431dec4d7ef8dc5d144c90402c71b4b41a772e5f120c38b8fe9aa0e +size 10423896 diff --git a/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg b/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg new file mode 100644 index 0000000000000000000000000000000000000000..dc5bbc86e1f4304b490711416d30dbeecec3a2b8 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ad69121c719dc533912a5233ee2ba4d895fd745283dc122601f20b0da2a519 +size 2223428 diff --git a/yc2_univl/backup/pdvc/ops/functions/__init__.py b/yc2_univl/backup/pdvc/ops/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/functions/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00b83c1e1d8810a77347e3d76609cdf347898186 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09ce00b445b4c8d76b027f013de6cb094dae82dc Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..222160988ac28f5eba55fe2acff1a6b176b3429b Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fc0981ca1144f3eb8a7166b570fb797f8004a16 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py b/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +try: + import MultiScaleDeformableAttention as MSDA +except: + pass + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + ctx.im2col_step = im2col_step + output = MSDA.ms_deform_attn_forward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value, grad_sampling_loc, grad_attn_weight = \ + MSDA.ms_deform_attn_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head + + _, Lq_, M_, L_, P_, _ = sampling_locations.shape # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points + + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1] + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h. + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='border', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + + if return_value: + return torch.stack(sampling_value_list, dim=-2) + #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/yc2_univl/backup/pdvc/ops/make.sh b/yc2_univl/backup/pdvc/ops/make.sh new file mode 100644 index 0000000000000000000000000000000000000000..a7e4320108ecd2f02d1824505849850b0c69d319 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/make.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ +python setup.py build install diff --git a/yc2_univl/backup/pdvc/ops/modules/__init__.py b/yc2_univl/backup/pdvc/ops/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/modules/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn import MSDeformAttn +from .ms_deform_attn_for_caption import MSDeformAttnCap \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd29db0d448db6cc3ebfcb499cb6105d2f745555 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc85ead761d81b2d819429824ee2393e9f50a6ae Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1de99e2b9ab1efc42b399837d8cfd7a09a3e2ef1 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12c1ccbe61ed8ca360ce969e012e60a89d05cece Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..671fa7d00552b0d0913bf502750b061574f7b3f2 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a23f676c0714c277a628441a7459d2724f62b61 Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc differ diff --git a/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py @@ -0,0 +1,126 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points ) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + if query.device.type == 'cuda': + output = MSDeformAttnFunction.apply( + value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, + self.im2col_step) + else: + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + output = self.output_proj(output) + return output diff --git a/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py @@ -0,0 +1,123 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttnCap(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2) + grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points) + for i in range(self.n_points): + grid_init[:, :, i] *= i + 1 + grid_init = grid_init - grid_init.mean(2, keepdim=True) + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area + or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} T_l, C) + :param input_spatial_shapes (n_levels ), [T_0, T_1, ..., T_{L-1}] + :param input_level_start_index (n_levels ), [0, 1_0, T_0+T_1, ...] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert input_spatial_shapes.sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 1 + if reference_points.shape[-1] == 1: + offset_normalizer = input_spatial_shapes + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None] + elif reference_points.shape[-1] == 2: + sampling_locations = reference_points[:, :, None, :, None, 0] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1])) + + + + if True: + sampling_locations = torch.stack( + (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1) + input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1) + + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights, + return_value=True) + + return output diff --git a/yc2_univl/backup/pdvc/ops/setup.py b/yc2_univl/backup/pdvc/ops/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..a0131bc21cf1b45b90fcf174e2c53e4c08e9c641 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/setup.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +import os +import glob + +import torch + +from torch.utils.cpp_extension import CUDA_HOME +from torch.utils.cpp_extension import CppExtension +from torch.utils.cpp_extension import CUDAExtension + +from setuptools import find_packages +from setuptools import setup + +requirements = ["torch", "torchvision"] + +def get_extensions(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + extensions_dir = os.path.join(this_dir, "src") + + main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) + source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) + + sources = main_file + source_cpu + extension = CppExtension + extra_compile_args = {"cxx": []} + define_macros = [] + + if torch.cuda.is_available() and CUDA_HOME is not None: + extension = CUDAExtension + sources += source_cuda + define_macros += [("WITH_CUDA", None)] + extra_compile_args["nvcc"] = [ + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", + ] + else: + raise NotImplementedError('Cuda is not availabel') + + sources = [os.path.join(extensions_dir, s) for s in sources] + include_dirs = [extensions_dir] + ext_modules = [ + extension( + "MultiScaleDeformableAttention", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + return ext_modules + +setup( + name="MultiScaleDeformableAttention", + version="1.0", + author="Weijie Su", + url="https://github.com/fundamentalvision/Deformable-DETR", + description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", + packages=find_packages(exclude=("configs", "tests",)), + ext_modules=get_extensions(), + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, +) diff --git a/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1bf854de1f3860d20b6fef5c1a17817c268e70a --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp @@ -0,0 +1,41 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include + +#include +#include + + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + diff --git a/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..81b7b58a3d9502bbb684dc84687a526dedf94cae --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h @@ -0,0 +1,33 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once +#include + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + + diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..d6d583647cce987196d5ad1968a8a365a379e774 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu @@ -0,0 +1,153 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include +#include "cuda/ms_deform_im2col_cuda.cuh" + +#include +#include +#include +#include + + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); + + const int batch_n = im2col_step_; + auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto columns = output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] { + ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + columns.data()); + + })); + } + + output = output.view({batch, num_query, num_heads*channels}); + + return output; +} + + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto grad_value = at::zeros_like(value); + auto grad_sampling_loc = at::zeros_like(sampling_loc); + auto grad_attn_weight = at::zeros_like(attn_weight); + + const int batch_n = im2col_step_; + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto grad_output_g = grad_output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] { + ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(), + grad_output_g.data(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + grad_value.data() + n * im2col_step_ * per_value_size, + grad_sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + grad_attn_weight.data() + n * im2col_step_ * per_attn_weight_size); + + })); + } + + return { + grad_value, grad_sampling_loc, grad_attn_weight + }; +} \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..c7ae53f99c820ce6193b608ad344550348a0b42c --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h @@ -0,0 +1,30 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once +#include + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..5635be7822e7cbfb8b5524185f213a9368a91dce --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh @@ -0,0 +1,1328 @@ +/*! +************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************** +* Modified from DCN (https://github.com/msracver/Deformable-ConvNets) +* Copyright (c) 2018 Microsoft +************************************************************************** +*/ + +#include +#include +#include + +#include +#include + +#include + +// 使用相同间隔分配block +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ + i < (n); \ + i += blockDim.x * gridDim.x) + +const int CUDA_NUM_THREADS = 1024; +inline int GET_BLOCKS(const int N, const int num_threads) +{ + return (N + num_threads - 1) / num_threads; +} + + +template +__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + } + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + *grad_attn_weight = top_grad * val; + *grad_sampling_loc = width * grad_w_weight * top_grad_value; + *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + atomicAdd(grad_attn_weight, top_grad * val); + atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value); + atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value); +} + + +template +__global__ void ms_deformable_im2col_gpu_kernel(const int n, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + scalar_t *data_col_ptr = data_col + index; + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + scalar_t col = 0; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride); + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight; + } + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + } + } + *data_col_ptr = col; + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockSize; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockSize/2; s>0; s>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockDim.x; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]); + atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]); + atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]); + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear_gm( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + grad_sampling_loc, grad_attn_weight); + } + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +void ms_deformable_im2col_cuda(cudaStream_t stream, + const scalar_t* data_value, + const int64_t* data_spatial_shapes, + const int64_t* data_level_start_index, + const scalar_t* data_sampling_loc, + const scalar_t* data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* data_col) +{ + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + const int num_threads = CUDA_NUM_THREADS; + ms_deformable_im2col_gpu_kernel + <<>>( + num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, + batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); + } + +} + +template +void ms_deformable_col2im_cuda(cudaStream_t stream, + const scalar_t* grad_col, + const scalar_t* data_value, + const int64_t * data_spatial_shapes, + const int64_t * data_level_start_index, + const scalar_t * data_sampling_loc, + const scalar_t * data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels; + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + if (channels > 1024) + { + if ((channels & 1023) == 0) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_gm + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + else{ + switch(channels) + { + case 1: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 2: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 4: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 8: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 16: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 32: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 64: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 128: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 256: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 512: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 1024: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + default: + if (channels < 64) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + } + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); + } + +} \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h b/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h new file mode 100644 index 0000000000000000000000000000000000000000..ac0ef2ec25f7d0ee51ca2d807b159ddf85652017 --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h @@ -0,0 +1,62 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#pragma once + +#include "cpu/ms_deform_attn_cpu.h" + +#ifdef WITH_CUDA +#include "cuda/ms_deform_attn_cuda.h" +#endif + + +at::Tensor +ms_deform_attn_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_forward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +std::vector +ms_deform_attn_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_backward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + diff --git a/yc2_univl/backup/pdvc/ops/src/vision.cpp b/yc2_univl/backup/pdvc/ops/src/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2201f63a51dca16d0b31148ed2c9e8e47ec15bdc --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/src/vision.cpp @@ -0,0 +1,16 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +#include "ms_deform_attn.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); + m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); +} diff --git a/yc2_univl/backup/pdvc/ops/test.py b/yc2_univl/backup/pdvc/ops/test.py new file mode 100644 index 0000000000000000000000000000000000000000..8dbf6d5547d131f01a8c5c28b76557bd27a9334b --- /dev/null +++ b/yc2_univl/backup/pdvc/ops/test.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import time +import torch +import torch.nn as nn +from torch.autograd import gradcheck + +from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +N, M, D = 1, 2, 2 +Lq, L, P = 2, 2, 2 +shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() +level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1])) +S = sum([(H*W).item() for H, W in shapes]) + + +torch.manual_seed(3) + + +@torch.no_grad() +def check_forward_equal_with_pytorch_double(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +@torch.no_grad() +def check_forward_equal_with_pytorch_float(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): + + value = torch.rand(N, S, M, channels).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + func = MSDeformAttnFunction.apply + + value.requires_grad = grad_value + sampling_locations.requires_grad = grad_sampling_loc + attention_weights.requires_grad = grad_attn_weight + + gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) + + print(f'* {gradok} check_gradient_numerical(D={channels})') + + +if __name__ == '__main__': + check_forward_equal_with_pytorch_double() + check_forward_equal_with_pytorch_float() + + for channels in [30, 32, 64, 71, 1025, 2048, 3096]: + check_gradient_numerical(channels, True, True, True) + + + diff --git a/yc2_univl/backup/pdvc/pdvc.py b/yc2_univl/backup/pdvc/pdvc.py new file mode 100644 index 0000000000000000000000000000000000000000..4f7ffe3067b2a1382a79c0efc5a8ac828baa9c03 --- /dev/null +++ b/yc2_univl/backup/pdvc/pdvc.py @@ -0,0 +1,1305 @@ +# ------------------------------------------------------------------------ +# PDVC +# ------------------------------------------------------------------------ +# Modified from Deformable DETR(https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +import json +import torch +import torch.nn.functional as F +from torch import nn +import math +import time + +from misc.detr_utils import box_ops +from misc.detr_utils.misc import (inverse_sigmoid) + +from .matcher import build_matcher + +from .deformable_transformer import build_deforamble_transformer +from pdvc.CaptioningHead import build_captioner +import copy +from .criterion import AlignCriterion, SetCriterion, ContrastiveCriterion +# from .rl_tool import init_scorer +from misc.utils import decide_two_stage +from .base_encoder import build_base_encoder +# from .video_segmentation import segment_video_into_steps, alignment_to_boundary, to_center_duration, align_frame_into_steps +from .video_segmentation import * +# from transformers import AutoModel, BertConfig +# from transformers.models.bert.modeling_bert import BertEncoder +import numpy as np +from itertools import chain +# from .UniVL import load_pretrained_UniVL + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + +class PDVC(nn.Module): + """ This is the PDVC module that performs dense video captioning """ + + def __init__(self, base_encoder, transformer, captioner, num_classes, num_queries, num_feature_levels, + aux_loss=True, with_box_refine=False, opt=None, translator=None): + """ Initializes the model. + Parameters: + transformer: torch module of the transformer architecture. See transformer.py + captioner: captioning head for generate a sentence for each event queries + num_classes: number of foreground classes + num_queries: number of event queries. This is the maximal number of events + PDVC can detect in a single video. For ActivityNet Captions, we recommend 10-30 queries. + aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used. + with_box_refine: iterative bounding box refinement + opt: all configs + """ + super().__init__() + self.opt = opt + self.base_encoder = base_encoder + self.transformer = transformer + self.caption_head = captioner + num_pred_text = 0 + + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # self.text_encoder = text_encoder + # text_encoder_hidden_dim = self.text_encoder.config.hidden_size + # num_pred_text += 1 + + hidden_dim = transformer.d_model + text_hidden_dim = opt.text_hidden_dim + + if self.opt.use_anchor: + # self.tgt_embed = nn.Embedding(num_queries, hidden_dim) + self.anchor_embed = nn.Embedding(num_queries, 2) # num_queries, 2 (center, duration) + self.query_embed = self.transformer.prepare_init_anchor_and_query(self.anchor_embed, hidden_dim, \ + random_anchor_init=True, prior_anchor_duration_init=True, \ + prior_duration=0.048) + self.query_embed = nn.Parameter(self.query_embed, requires_grad=True) + else: + self.query_embed = nn.Embedding(num_queries, hidden_dim * 2) + + self.class_head = nn.Linear(hidden_dim, num_classes) + self.class_refine_head = nn.Linear(hidden_dim, num_classes) # For refine pseudo box if use additional score layer + self.count_head = nn.Linear(hidden_dim, opt.max_eseq_length + 1) + self.bbox_head = MLP(hidden_dim, hidden_dim, 2, 3) + + self.num_feature_levels = num_feature_levels + self.aux_loss = aux_loss + self.with_box_refine = with_box_refine + self.share_caption_head = opt.share_caption_head + + # initialization + prior_prob = 0.01 + bias_value = -math.log((1 - prior_prob) / prior_prob) + self.class_head.bias.data = torch.ones(num_classes) * bias_value + self.class_refine_head.bias.data = torch.ones(num_classes) * bias_value + nn.init.constant_(self.bbox_head.layers[-1].weight.data, 0) + nn.init.constant_(self.bbox_head.layers[-1].bias.data, 0) + + if self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim' \ + or self.opt.use_pseudo_box: + self.load_text_embed = True + else: + self.load_text_embed = False + + + num_pred = transformer.decoder.num_layers + if self.share_caption_head: + print('all decoder layers share the same caption head') + self.caption_head = nn.ModuleList([self.caption_head for _ in range(num_pred)]) + else: + print('do NOT share the caption head') + self.caption_head = _get_clones(self.caption_head, num_pred) + + if self.opt.use_additional_cap_layer: + self.caption_head_refine = _get_clones(captioner, self.opt.refine_pseudo_stage_num) + + if with_box_refine: + self.class_head = _get_clones(self.class_head, num_pred) + self.count_head = _get_clones(self.count_head, num_pred) + self.bbox_head = _get_clones(self.bbox_head, num_pred) + nn.init.constant_(self.bbox_head[0].layers[-1].bias.data[1:], -2) + # hack implementation for iterative bounding box refinement + self.transformer.decoder.bbox_head = self.bbox_head + else: + nn.init.constant_(self.bbox_head.layers[-1].bias.data[1:], -2) + self.class_head = nn.ModuleList([self.class_head for _ in range(num_pred)]) + self.count_head = nn.ModuleList([self.count_head for _ in range(num_pred)]) + self.bbox_head = nn.ModuleList([self.bbox_head for _ in range(num_pred)]) + self.transformer.decoder.bbox_head = None + + self.class_refine_head = _get_clones(self.class_refine_head, self.opt.refine_pseudo_stage_num) + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + if opt.disable_contrastive_projection: + projection_event = nn.Identity() + projection_text = nn.Identity() + else: + projection_event = nn.Linear(hidden_dim, opt.contrastive_hidden_size) + projection_text = nn.Linear(text_hidden_dim, opt.contrastive_hidden_size) + self.contrastive_projection_event = nn.ModuleList( + [projection_event for _ in range(num_pred)]) + self.contrastive_projection_text = nn.ModuleList( + [projection_text for _ in range(num_pred)]) + if opt.enable_bg_for_cl: + self.background_embed = nn.Parameter(torch.randn(1, opt.contrastive_hidden_size), requires_grad=True) + else: + self.background_embed = None + + + self.translator = translator + + self.disable_mid_caption_heads = opt.disable_mid_caption_heads + if self.disable_mid_caption_heads: + print('only calculate caption loss in the last decoding layer') + + self.pseudo_boxes = {} + + + def get_filter_rule_for_encoder(self): + filter_rule = lambda x: 'input_proj' in x \ + or 'transformer.encoder' in x \ + or 'transformer.level_embed' in x \ + or 'base_encoder' in x + return filter_rule + + def encoder_decoder_parameters(self): + filter_rule = self.get_filter_rule_for_encoder() + enc_paras = [] + dec_paras = [] + for name, para in self.named_parameters(): + if filter_rule(name): + print('enc: {}'.format(name)) + enc_paras.append(para) + else: + print('dec: {}'.format(name)) + dec_paras.append(para) + return enc_paras, dec_paras + + # def text_encoding(self, text_encoder_input): + # ''' + # Produce the text embedding for each caption + # :param text_encoder_input: a dict of input for text encoder + # ''' + # if self.opt.pretrained_language_model == 'UniVL' or self.opt.use_pseudo_box: + # # breakpoint() + # dtype = next(self.parameters()).dtype + # enable_grad = False + # use_amp = False + # with torch.cuda.amp.autocast(enabled=use_amp): + # with torch.set_grad_enabled(enable_grad): + # text_embed = self.text_encoder(**text_encoder_input, output_all_encoded_layers=True)[0][-1] + # text_embed = text_embed.to(dtype=dtype) # num_sentence, num_word, dim + # attention_mask = text_encoder_input['attention_mask'].unsqueeze(-1).to(dtype=dtype) # num_sentence, num_word, 1 + # attention_mask[:,0,:] = 0. # This operation follows from the UniVL + # text_embed = text_embed * attention_mask # num_sentence, num_word, dim + # text_embed = text_embed.sum(dim=1) / attention_mask.sum(dim=1) # num_sentence, dim + # raw_text_embed = text_embed + # # if video_name: + # # text_feature_path = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text' + # # np.save('{}/{}.npy'.format(text_feature_path, video_name), text_embed.detach().cpu().numpy()) + # text_embed = self.contrastive_projection_text[-1](text_embed) + + # else: + # dtype = next(self.parameters()).dtype + # enable_grad = False + # use_amp = False + # with torch.cuda.amp.autocast(enabled=use_amp): + # with torch.set_grad_enabled(enable_grad): + # text_embed = self.text_encoder(**text_encoder_input) + # text_embed = text_embed['pooler_output'].to(dtype=dtype) # num_sentence, dim + # text_embed = self.contrastive_projection_text[-1](text_embed) # num_sentence, dim_contrastive_learning + # # TODO: add more paradigm to generate the text_embedding + + # return text_embed, raw_text_embed + + def forward(self, dt, criterion, contrastive_criterion, eval_mode=False): + transformer_input_type = self.opt.transformer_input_type + vf = dt['video_tensor'] # (N, L, C) + mask = ~ dt['video_mask'] # (N, L) + duration = dt['video_length'][:, 1] + video_name = dt['video_key'][0][2:] + # text_encoder_input = dt['text_encoder_input'] if (self.opt.matcher_type=='DTW' or self.opt.use_pseudo_box) else None + N, L, C = vf.shape + # assert N == 1, "batch size must be 1."s + + srcs, masks, pos = self.base_encoder(vf, mask, duration) + + src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten = self.transformer.prepare_encoder_inputs( + srcs, masks, pos) + memory = self.transformer.forward_encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, + lvl_pos_embed_flatten, mask_flatten) + + two_stage, disable_iterative_refine, proposals, proposals_mask = decide_two_stage(transformer_input_type, + dt, criterion) + if two_stage: + if transformer_input_type == 'prior_proposals': + if self.opt.prior_manner == 'add': + #print('Insert the prior knowledge by adding the prior proposals to the query embed') + init_query_embed = self.query_embed.weight + _, tgt = torch.chunk(init_query_embed, 2, dim=1) + tgt = tgt.unsqueeze(0).expand(N, -1, -1) + init_reference, _, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0]) + proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool() + else: + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0]) + proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool() + else: + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_proposal( + proposals) + else: + if self.opt.use_anchor: + # tgt = self.tgt_embed.weight + anchor = self.anchor_embed.weight # num_queries, 2 + query_anchor = (self.query_embed, anchor) + proposals_mask = torch.ones(N, self.query_embed.shape[0], device=self.query_embed.device).bool() + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_anchor(memory, query_anchor) + else: + query_embed = self.query_embed.weight + proposals_mask = torch.ones(N, query_embed.shape[0], device=query_embed.device).bool() + init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_query(memory, + query_embed) + hs, inter_references = self.transformer.forward_decoder(tgt, reference_points, memory, temporal_shapes, + level_start_index, valid_ratios, query_embed, + mask_flatten, proposals_mask, disable_iterative_refine) + # hs: [num_decoder_layer, bs, num_query, feat_dim] + + # breakpoint() + # project to co-embedding space + if self.load_text_embed and eval_mode==False: + # text_embed, raw_text_embed = self.text_encoding(text_encoder_input) + # text_embed = [text_embed] * hs.shape[0] + # text_embed = torch.stack(text_embed, dim=0) + raw_text_embed = dt['cap_embed'] * hs.shape[0]# dt['caption_embedding'] returns a tuple(list) + # text_embed: [num_decoder_layer, num_sentence, contrastive_dim] + event_embed = torch.stack([self.contrastive_projection_event[i](hs_i) for i, hs_i in enumerate(hs)]) + text_embed = torch.stack([self.contrastive_projection_text[j](hs_j.cuda()) for j, hs_j in enumerate(raw_text_embed)]) + # breakpoint() + # event_embed: [num_decoder_layer, num_query, contrastive_dim] + else: + raw_text_embed = None + text_embed = None + event_embed = hs + # breakpoint() + if self.opt.use_pseudo_box and self.training: + # breakpoint() + # print('use pseudo box') + video_frame_num = dt['video_length'][:,0].cpu().numpy() # [feature_len, raw_video_len, video_len] + video_name = dt['video_key'][0] + if self.pseudo_boxes.get(video_name) is not None and 'box' in self.pseudo_boxes[video_name].keys() and 'loss' in self.pseudo_boxes[video_name].keys(): + # if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2': + video_step_alignment = [self.pseudo_boxes[video_name]['box']] + + else: + if self.opt.pseudo_box_type == 'align': + video_step_segment = [segment_video_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + # elif self.opt.pseudo_box_type == 'similarity': + # video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + # bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)] + # breakpoint() + elif self.opt.pseudo_box_type == "similarity": + # breakpoint() + if self.opt.width_ratio < 0: + video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode) for i in range(N)] + else: + video_step_alignment = [align_frame_into_steps_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode, ratio=self.opt.width_ratio) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op': + video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=False, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order': + video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order_v1': + video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_order_v2': + video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_v2': + video_step_alignment = [align_frame_into_steps_op_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'weight_sim': + if self.opt.width_ratio < 0: + video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)] + else: + # breakpoint() + video_step_alignment = [step_retrieval_weight_sim_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)] + + elif self.opt.pseudo_box_type == 'weight_index': + video_step_alignment = [step_retrieval_weight_index(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)] + elif self.opt.pseudo_box_type == 'modeframe': + video_step_alignment = [align_frame_into_steps_mode(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ + topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)] + elif self.opt.pseudo_box_type == 'uniform': + video_step_alignment = [uniform_box(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)] + # breakpoint() + else: + raise NotImplementedError('pseudo_box_type {} is not implemented'.format(self.opt.pseudo_box_type)) + + + if self.opt.pseudo_box_type != 'align': + if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2': + # breakpoint() + video_step_alignment, loss_op = [out[0] for out in video_step_alignment], [out[1] for out in video_step_alignment] + self.pseudo_boxes[video_name] = {'box': video_step_alignment[0], 'loss': loss_op[0].item()} + else: + self.pseudo_boxes[video_name] = {'box': video_step_alignment[0]} + + if self.opt.pseudo_box_type != 'align': + bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)] + else: + bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + + + # self.pseudo_boxes[video_name] = video_step_alignment[0] + # self.pseudo_boxes[video_name] = video_step_alignment[0] + # bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)] + + bbox_alignment = to_center_duration(bbox_alignment) + + + for sample in range(len(dt['video_target'])): + dt['video_target'][sample]['boxes_pseudo'] = bbox_alignment[sample] + # dt['video_target'][sample]['boxes'] = bbox_alignment[sample] + # else: + # print('use gt box') + + #breakpoint() + others = {'memory': memory, + 'mask_flatten': mask_flatten, + 'spatial_shapes': temporal_shapes, + 'level_start_index': level_start_index, + 'valid_ratios': valid_ratios, + 'proposals_mask': proposals_mask, + 'text_embed': text_embed, + 'event_embed': event_embed} + # breakpoint() + if eval_mode or self.opt.caption_loss_coef == 0: + out, loss = self.parallel_prediction_full(dt, criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + else: + if self.opt.refine_pseudo_box and self.opt.use_pseudo_box: + # print('refine') + out, loss = self.parallel_prediction_refine_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + else: + # print('no refine') + out, loss = self.parallel_prediction_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type) + return out, loss + + def predict_event_num(self, counter, hs_lid): + hs_lid_pool = torch.max(hs_lid, dim=1, keepdim=False)[0] # [bs, feat_dim] + outputs_class0 = counter(hs_lid_pool) + return outputs_class0 + + def parallel_prediction_full(self, dt, criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + ''' + hs: [decoder_layer, bs, num_query, feat_dim] + init_reference: [bs, num_query, 1] + inter_references: [decoder_layer, bs, num_query, 2] + ''' + outputs_classes = [] + outputs_classes0 = [] + outputs_coords = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + num_pred = hs.shape[0] + #breakpoint() + for l_id in range(hs.shape[0]): + if l_id == 0: + reference = init_reference + else: + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + output_count = self.predict_event_num(self.count_head[l_id], hs_lid) + n_pred_sentence = output_count.argmax(dim=-1).clamp(min=1).item() + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 4] + + # if self.opt.disable_mid_caption_heads and (l_id != hs.shape[0] - 1): + if l_id != hs.shape[0] - 1: + cap_probs, seq = self.caption_prediction_eval( + self.caption_head[l_id], dt, hs_lid, reference, others, 'none') + else: + cap_probs, seq = self.caption_prediction_eval( + self.caption_head[l_id], dt, hs_lid, reference, others, self.opt.caption_decoder_type) # Only output caption in the last decoding layer + + # if self.opt.use_anchor: + # outputs_coord = reference + # else: + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if self.opt.matcher_type == 'DTW': + assert reference.shape[-1] == 2 and tmp.shape[-1] == 2 + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :2] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 2] + + outputs_classes.append(outputs_class) + outputs_classes0.append(output_count) + outputs_coords.append(outputs_coord) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + output_count = torch.stack(outputs_classes0) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + + all_out = {'pred_logits': outputs_class, + 'pred_count': output_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs} + out = {k: v[-1] for k, v in all_out.items()} + + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + + # loss, _, _ = criterion(out, dt['video_target'], others) + return out, [] + + def parallel_prediction_refine_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + + outputs_classes = [] + outputs_counts = [] + outputs_coords = [] + outputs_cap_costs = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + cl_match_mats = [] + + num_pred = hs.shape[0] + if self.opt.pseudo_box_aug: + assert self.opt.use_pseudo_box + num_sentence = dt['gt_boxes'].size(-2) + assert num_sentence == len(dt['cap_raw'][0]) + if self.opt.pseudo_box_aug_num * num_sentence > self.opt.num_queries: + aug_num = self.opt.num_queries // num_sentence + else: + aug_num = self.opt.pseudo_box_aug_num + if self.opt.refine_pseudo_box: + ori_dt_cap_tensor = copy.deepcopy(dt['cap_tensor']) + ori_dt_cap_mask = copy.deepcopy(dt['cap_mask']) + cap_dim = dt['cap_tensor'].shape[-1] #(num_sen, num_max_word) + dt['cap_tensor'] = dt['cap_tensor'].repeat(1, aug_num).reshape(-1, cap_dim) + dt['cap_mask'] = dt['cap_mask'].repeat(1, aug_num).reshape(-1, cap_dim) + + for l_id in range(num_pred): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[ + l_id - 1] # [decoder_layer, batch, query_num, ...] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid) + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 2] + + cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, + reference, others, 'none') + + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :1] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 4] + + # Processing the text embed and event embed for alignment + if self.load_text_embed or self.opt.disable_contrastive_projection: + assert others['text_embed'].shape[0] == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0]) + text_embed = others['text_embed'][l_id] # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + # event_embed = event_embed.reshape(-1, event_embed.shape[-1]) + # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat' + + + if self.opt.enable_contrastive and self.opt.set_cost_cl > 0: + assert len(others['text_embed']) == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed'])) + text_embed = torch.cat(others['text_embed'][l_id], dim=0) # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t() + # cl_match_mat: [num_query, num_sentence] + cl_match_mats.append(cl_match_mat) + else: + cl_match_mats.append(0) + + outputs_classes.append(outputs_class) + outputs_counts.append(outputs_count) + outputs_coords.append(outputs_coord) + # outputs_cap_losses.append(cap_loss) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + outputs_count = torch.stack(outputs_counts) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + # outputs_cap_loss = torch.stack(outputs_cap_losses) + + all_out = { + 'pred_logits': outputs_class, + 'pred_count': outputs_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs, + 'cl_match_mats': cl_match_mats} + out = {k: v[-1] for k, v in all_out.items()} + + + # ============================= Refine pseudo box here ================================ + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + mil_dict = {} + bag_score_cache = [] + for stage in range(self.opt.refine_pseudo_stage_num): + # Decay augment ratio as the stage increases + aug_ratio = self.opt.pseudo_box_aug_ratio * (0.5 ** stage) + _, last_indices, aux_indices = criterion(out, dt['video_target'], others, aug_num, aug_ratio) + # Only use the last decoder layer output to conduct the pseudo box refinement + hs_lid = hs[-1] + reference = inter_references[-1] #[1, num_query, 2] + indices = last_indices[0] # [tensor(): num_matched_query ,tensor(): num_matched_cap] + query_indices = indices[0][0] # the indices of matched query is ordered + cap_indices = indices[0][1] # the indices of matched sentence is unordered + # breakpoint() + # num_sentence = cap_indices.size(0) // self.opt.pseudo_box_aug_num + cap_sort = torch.sort(cap_indices)[1] + reorder_query_indices = query_indices[cap_sort] + if self.opt.use_neg_pseudo_box: + neg_query_indices = [] + neg_cap_indices = torch.arange(0,cap_indices.size(0),aug_num).view(num_sentence,-1).repeat(1,self.opt.num_neg_box).view(-1) + for i in range(num_sentence): + # select some negetive indices from reordered query indices + candidates_r = (reorder_query_indices[(i+1)*aug_num:]) + candidates_l = (reorder_query_indices[:(i)*aug_num]) + if (candidates_r.size(0) > 0) and (candidates_l.size(0) > 0): + candidates = torch.cat((candidates_r, candidates_l)) + else: + candidates = candidates_r if candidates_r.size(0) > 0 else candidates_l + if candidates.size(0) == 0: + candidates = reorder_query_indices + if candidates.size(0) < self.opt.num_neg_box: + random_selected_indices = torch.randperm(candidates.size(0)) + padding_num = self.opt.num_neg_box - candidates.size(0) + random_selected_indices = torch.cat((random_selected_indices, random_selected_indices[:padding_num])) + else: + random_selected_indices = torch.randperm(reorder_query_indices.size(0)-aug_num)[:self.opt.num_neg_box] + neg_query_indices.append(candidates[random_selected_indices]) + neg_query_indices = torch.cat(neg_query_indices) + neg_indices = [(neg_query_indices, neg_cap_indices)] + # query_indices: ordered, cap_indices: unordered + # ++++++ <1>. Produce the instance score and classification score + if self.opt.use_additional_cap_layer: + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + if (stage > 0) and self.opt.use_neg_pseudo_box: + _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, neg_indices) + else: + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + if (stage > 0) and self.opt.use_neg_pseudo_box: + _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, neg_indices) + # breakpoint() + # sentence_cap_prob: the caption probility for each matched query torch.Size([num_matched_query]) + if self.opt.use_additional_score_layer: + query_ins_score = self.class_refine_head[stage](hs_lid)[:, query_indices, :] + else: + query_ins_score = outputs_classes[-1][:, query_indices, :] # [1, num_matched_query, 1] + query_pred_boxes = outputs_coord[-1][:, query_indices, :] # [1, num_matched_query, 2] + query_pred_boxes = query_pred_boxes[0,:,:][cap_sort].view(-1, 2) # [num_matched_query, 2] + # breakpoint() + try: + query_ins_score = query_ins_score[0,cap_sort,0].view(-1, aug_num) # [num_cap, num_aug] + except: + breakpoint() + if self.opt.norm_ins_score == 'softmax': + query_ins_score = torch.softmax(query_ins_score, dim=-1) + elif self.opt.norm_ins_score == 'sigmoid': + query_ins_score = query_ins_score.sigmoid() + else: + raise NotImplementedError + + # breakpoint() + # sentence_cap_score = cap_probs['cap_prob_train'] + temperature = 2 + sentence_cap_prob = sentence_cap_prob[cap_sort].view(-1, aug_num) # [num_cap, num_aug] + cap_len = torch.tensor([len(cap.split()) for cap in dt['cap_raw'][0]], device=sentence_cap_prob.device).unsqueeze(1) + sentence_cap_score = (sentence_cap_prob / cap_len) ** temperature + 1e-5 + + sentence_cap_score[torch.isinf(sentence_cap_score)] = 1e8 + + sentence_cap_score = sentence_cap_score.detach() + query_ins_score = query_ins_score.detach() + + # breakpoint() + query_score = sentence_cap_score + query_ins_score + # sentence_score = + # if (stage == 0) or (self.opt.focal_mil == False): + # sentence_cap_prob = torch.softmax(sentence_cap_prob, dim=-1) # Softmax over queries in the same bag + # else: + # sentence_cap_prob = sentence_cap_prob.sigmoid() + + # if self.opt.cap_prob_clip: + # query_score = sentence_cap_prob.detach() * query_ins_score # [num_cap, num_aug] + # else: + # query_score = sentence_cap_prob * query_ins_score # [num_cap, num_aug] + + # # ++++++ <2>. Calculate the MIL loss and Neg loss + bag_score = query_score.sum(dim=-1) # [num_cap] + bag_score = bag_score.clamp(0,1) + bag_score_cache.append(bag_score) + mil_weight = bag_score_cache[stage-1] if self.opt.weighted_mil_loss else torch.ones_like(bag_score).to(bag_score.device) + if stage > 0: + if self.opt.focal_mil: + focal_weight = (torch.ones_like(bag_score).to(bag_score.device) - bag_score).pow(2) + mil_loss = - focal_weight * (bag_score + 1e-6).log() + mil_loss = (mil_weight * mil_loss).mean() + else: + # breakpoint() + mil_loss = - (mil_weight * bag_score.log()).mean() + if self.opt.use_neg_pseudo_box: + neg_cap_prob = neg_cap_prob.sigmoid() + neg_loss = - ((neg_cap_prob).pow(2) * (1- neg_cap_prob).log()).view(num_sentence,-1).mean(dim=-1) + neg_loss = (mil_weight * neg_loss).mean() + mil_loss += neg_loss + else: + mil_loss = F.binary_cross_entropy(bag_score, torch.ones_like(bag_score).to(bag_score.device)) + if 'loss_mil' in mil_dict.keys(): + mil_dict['loss_mil'] += mil_loss + else: + mil_dict['loss_mil'] = mil_loss + # ++++++ <3>. Merge the pseudo box to generate new pseudo box + if self.opt.merge_criterion == 'cap_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(sentence_cap_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k] + elif self.opt.merge_criterion == 'ins_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_ins_score, k=self.opt.merge_k_boxes, dim=-1) + elif self.opt.merge_criterion == 'ins_cap_topk': + topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k] + else: + raise NotImplementedError('merge_criterion {} is not implemented'.format(self.opt.merge_criterion)) + # breakpoint() + topk_pseudo_scores = topk_pseudo_scores / (topk_pseudo_scores.sum(dim=-1, keepdim=True) + 1e-6) # [num_caption, k] + weight = topk_pseudo_scores.unsqueeze(-1).repeat(1,1,2) # [num_caption, k, 2] + for i in range(len(dt['video_target'])): + previous_pseudo_box = dt['video_target'][i]['box_pseudo_aug'] #[num_caption*num_aug, 2] + if self.opt.use_query_box_for_refine: + # Use the coordinates of query as part of guidance for refinement + previous_pseudo_box = (previous_pseudo_box + query_pred_boxes) / 2 + if self.opt.merge_mode == 'weighted_sum': + # Merge top-k boxes with weighted sum + selected_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \ + topk_pseudo_indices.unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) # [num_caption, k, 2] + refined_pseudo_box = (weight * selected_pseudo_box).sum(dim=1).clamp(0,1) # [num_caption, 2] + dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone() + # I met the following problem with ''targets_cp = copy.deepcopy(targets)'' in criterion.py: + # RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment + # When I tried to conduct the deepcopy operation with the targets which have been updated with 'boxes_pseudo' keys + # So I detach the refined_pseudo_box here to avoid the deepcopy operation here + # Commented by Huabin, 2023/9/14 + elif self.opt.merge_mode == 'interpolate': + # Generate new box with linear interpolation between previous pbox and pbox with max score + max_pseudo_scores = topk_pseudo_scores[:,:1] + max_coef = 0.5 * torch.ones_like(max_pseudo_scores).to(max_pseudo_scores.device) # Set a max coef for box interpolatation + max_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \ + topk_pseudo_indices[:,:1].unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) + interpolate_coef = torch.min(max_pseudo_scores, max_coef) + refined_pseudo_box = (1-interpolate_coef) * previous_pseudo_box[(aug_num-1)::aug_num, :] \ + + interpolate_coef * max_pseudo_box.squeeze(1) + refined_pseudo_box = refined_pseudo_box.clamp(0,1) + dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone() + + # ++++++ <4>. End of the refinement, inverse-repeat the dt['cap_tensor'] and dt['cap_mask'] + dt['cap_tensor'] = ori_dt_cap_tensor + dt['cap_mask'] = ori_dt_cap_mask + mil_dict['loss_mil'] = mil_dict['loss_mil'] / self.opt.refine_pseudo_stage_num + criterion.pseudo_box_aug = False + # ================== End of refinement ======================================== + # breakpoint() + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + loss, last_indices, aux_indices = criterion(out, dt['video_target'], others) + if self.opt.disable_rematch: + # Disable re-matching and directly use the indices with max score in the last stage of refinment + selected_indices = query_score.argmax(dim=-1).unsqueeze(-1) + query_indices_in_refine = reorder_query_indices.to(selected_indices.device).view(-1, aug_num) + query_indices_in_refine = query_indices_in_refine.gather(1, selected_indices) + query_indices_in_refine, index_sort = torch.sort(query_indices_in_refine, 0) + cap_indices_in_refine = last_indices[0][0][1].sort()[0] + last_indices = [[(query_indices_in_refine.view(-1), cap_indices_in_refine[index_sort.view(-1)])], last_indices[1]] + loss.update(mil_dict) + criterion.pseudo_box_aug = True + for l_id in range(hs.shape[0]): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[l_id - 1] + indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + if l_id != hs.shape[0] - 1: + l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()} + loss.update(l_dict) + out.update({'caption_probs': cap_probs, 'seq': seq}) + else: + loss, last_indices = criterion(out, dt['video_target'], others) + criterion.pseudo_box_aug = True + l_id = hs.shape[0] - 1 + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + indices = last_indices[0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + loss.update(l_dict) + + out.pop('caption_losses') + out.pop('caption_costs') + out.update({'caption_probs': cap_probs, 'seq': seq}) + + + return out, loss + + def parallel_prediction_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others, + disable_iterative_refine, transformer_input_type='queries'): + + outputs_classes = [] + outputs_counts = [] + outputs_coords = [] + outputs_cap_costs = [] + outputs_cap_losses = [] + outputs_cap_probs = [] + outputs_cap_seqs = [] + cl_match_mats = [] + + num_pred = hs.shape[0] + + if self.opt.pseudo_box_aug: + assert self.opt.use_pseudo_box + cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word) + dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + + for l_id in range(num_pred): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[ + l_id - 1] # [decoder_layer, batch, query_num, ...] + outputs_class = self.class_head[l_id](hs_lid) # [bs, num_query, N_class] + outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid) + tmp = self.bbox_head[l_id](hs_lid) # [bs, num_query, 2] + + + cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, + reference, others, 'none') + # if self.opt.use_anchor: + # outputs_coord = reference + # else: + if disable_iterative_refine: + outputs_coord = reference + else: + reference = inverse_sigmoid(reference) + if reference.shape[-1] == 2: + tmp += reference + else: + assert reference.shape[-1] == 1 + tmp[..., :1] += reference + outputs_coord = tmp.sigmoid() # [bs, num_query, 4] + + # Processing the text embed and event embed for alignment + if self.load_text_embed or not self.opt.disable_contrastive_projection: + assert others['text_embed'].shape[0] == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0]) + text_embed = others['text_embed'][l_id] # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + # event_embed = event_embed.reshape(-1, event_embed.shape[-1]) + # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat' + + + if self.opt.enable_contrastive and self.opt.set_cost_cl > 0: + assert len(others['text_embed']) == num_pred, \ + 'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed'])) + text_embed = torch.cat(others['text_embed'][l_id], dim=0) # [num_sentence, contrastive_dim] + event_embed = others['event_embed'][l_id] + event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim] + cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t() + # cl_match_mat: [num_query, num_sentence] + cl_match_mats.append(cl_match_mat) + else: + cl_match_mats.append(0) + + outputs_classes.append(outputs_class) + outputs_counts.append(outputs_count) + outputs_coords.append(outputs_coord) + # outputs_cap_losses.append(cap_loss) + outputs_cap_probs.append(cap_probs) + outputs_cap_seqs.append(seq) + + outputs_class = torch.stack(outputs_classes) # [decoder_layer, bs, num_query, N_class] + outputs_count = torch.stack(outputs_counts) + outputs_coord = torch.stack(outputs_coords) # [decoder_layer, bs, num_query, 4] + # outputs_cap_loss = torch.stack(outputs_cap_losses) + + all_out = { + 'pred_logits': outputs_class, + 'pred_count': outputs_count, + 'pred_boxes': outputs_coord, + 'caption_probs': outputs_cap_probs, + 'seq': outputs_cap_seqs, + 'cl_match_mats': cl_match_mats} + out = {k: v[-1] for k, v in all_out.items()} + + if self.aux_loss: + ks, vs = list(zip(*(all_out.items()))) + out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)] + if transformer_input_type == 'prior_proposals': + loss, _, _ = criterion(out, dt['video_target']) + # Random select an query from each segment + num_sentence = dt['cap_tensor'].shape[0] + num_query = hs.shape[-2] + num_query_interval = num_query // num_sentence + query_indices = [] + for i in range(num_sentence): + interval_min = i * num_query_interval + interval_max = interval_min + num_query_interval + sample = torch.randint(interval_min, interval_max, (hs.shape[0],)) + query_indices.append(sample) + query_indices = torch.cat(query_indices, dim=0) + gt_indices = torch.arange(num_sentence) + + last_indices = ([(query_indices[::hs.shape[0]], gt_indices)], [None, None]) + aux_indices = [] + for l_id in range(hs.shape[0]-1): + aux_indices.append(([(query_indices[(l_id+1)::hs.shape[0]], gt_indices)], [None, None])) + else: + loss, last_indices, aux_indices = criterion(out, dt['video_target'], others) + for l_id in range(hs.shape[0]): + hs_lid = hs[l_id] + reference = init_reference if l_id == 0 else inter_references[l_id - 1] + indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + + l_dict = {'loss_caption': cap_loss} + if (self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim'): + contrastive_loss = contrastive_criterion( + text_embed = others['text_embed'][l_id], + event_embed = others['event_embed'][l_id], + matching_indices = indices, + bg_embed = self.background_embed, + ) + + l_dict.update({'contrastive_loss': contrastive_loss}) + if l_id != hs.shape[0] - 1: + l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()} + loss.update(l_dict) + out.update({'caption_probs': cap_probs, 'seq': seq}) + else: + loss, last_indices = criterion(out, dt['video_target'], others) + + l_id = hs.shape[0] - 1 + reference = inter_references[l_id - 1] # [decoder_layer, batch, query_num, ...] + hs_lid = hs[l_id] + indices = last_indices[0] + cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference, + others, self.opt.caption_decoder_type, indices) + l_dict = {'loss_caption': cap_loss} + loss.update(l_dict) + + out.pop('caption_losses') + out.pop('caption_costs') + out.update({'caption_probs': cap_probs, 'seq': seq}) + + return out, loss + + def caption_prediction(self, cap_head, dt, hs, reference, others, captioner_type, indices=None): + N_, N_q, C = hs.shape + # all_cap_num = len(dt['cap_tensor']) + # if self.opt.pseudo_box_aug: + # assert self.opt.use_pseudo_box + # cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word) + # # breakpoint() + # if indices != None: + # breakpoint() + # dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + # dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim) + all_cap_num = len(dt['cap_tensor']) + query_mask = others['proposals_mask'] + gt_mask = dt['gt_boxes_mask'] + mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item()) + query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu() + hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C) + + if indices == None: + row_idx, col_idx = 0, 0 + for i in range(N_): + mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1 + row_idx=row_idx + query_nums[i] + col_idx= col_idx + gt_nums[i] + + bigids = mix_mask.nonzero(as_tuple=False) + feat_bigids, cap_bigids = bigids[:, 0], bigids[:, 1] + else: + # breakpoint() + feat_bigids = torch.zeros(sum([len(_[0]) for _ in indices])).long() + cap_bigids = torch.zeros_like(feat_bigids) + total_query_ids = 0 + total_cap_ids = 0 + total_ids = 0 + max_pair_num = max([len(_[0]) for _ in indices]) + new_hr_for_dsa = torch.zeros(N_, max_pair_num, C) # only for lstm-dsa + cap_seq = dt['cap_tensor'] + new_seq_for_dsa = torch.zeros(N_, max_pair_num, cap_seq.shape[-1], dtype=cap_seq.dtype) # only for lstm-dsa + for i, index in enumerate(indices): + feat_ids, cap_ids = index + feat_bigids[total_ids: total_ids + len(feat_ids)] = total_query_ids + feat_ids + cap_bigids[total_ids: total_ids + len(feat_ids)] = total_cap_ids + cap_ids + new_hr_for_dsa[i, :len(feat_ids)] = hs[i, feat_ids] + new_seq_for_dsa[i, :len(feat_ids)] = cap_seq[total_cap_ids + cap_ids] + total_query_ids += query_nums[i] + total_cap_ids += gt_nums[i] + total_ids += len(feat_ids) + # if self.opt.pseudo_box_aug: + # # Revise the matched targer ids for pseudo box augmentation to caption id + # cap_bigids = cap_bigids // self.opt.pseudo_box_aug_num + cap_probs = {} + flag = True + + if captioner_type == 'none': + cost_caption = torch.zeros(N_, N_q, all_cap_num, + device=hs.device) # batch_size * num_queries * all_caption_num + loss_caption = torch.zeros(N_, N_q, all_cap_num, device=hs.device) + cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device) + cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device) + seq = torch.zeros(N_, N_q, 3, device=hs.device) + return cost_caption, loss_caption, cap_probs, seq + + elif captioner_type in ['light']: + clip = hs_r.unsqueeze(1) + clip_mask = clip.new_ones(clip.shape[:2]) + event = None + elif self.opt.caption_decoder_type == 'standard': + # breakpoint() + # assert N_ == 1, 'only support batchsize = 1' + if self.training: + # breakpoint() + seq = dt['cap_tensor'][cap_bigids] + if self.opt.caption_cost_type != 'rl': + if self.opt.refine_pseudo_box: # Only training and refine_pseudo_box = True returns the raw_cap_prob + cap_prob, raw_cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) + # shape: [num_sentence, max_num_word, num_vocab] + # cap_prob is log_softmax(prob), raw_cap_prob is (prob) + cap_probs['cap_prob_train'] = cap_prob + cap_probs['raw_cap_prob'] = raw_cap_prob + else: + cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) + # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608], here 13 is the max length among 5 sentences + cap_probs['cap_prob_train'] = cap_prob + else: + with torch.no_grad(): + cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, + dt['cap_tensor'][cap_bigids]) + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + flag = False + pass + + if flag: + clip_ext = clip[feat_bigids] + clip_mask_ext = clip_mask[feat_bigids] + + if self.training: + seq = dt['cap_tensor'][cap_bigids] + if self.opt.caption_cost_type != 'rl': + cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq) + cap_probs['cap_prob_train'] = cap_prob + else: + with torch.no_grad(): + seq_gt = dt['cap_tensor'][cap_bigids] + cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq_gt) + seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask) + + if len(seq): + # re_seq = torch.zeros(N_, N_q, seq.shape[-1]) + # re_cap_prob_eval = torch.zeros(N_, N_q, cap_prob_eval.shape[-1]) + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + if self.opt.caption_cost_type == 'loss': + cap_prob = cap_prob.reshape(-1, cap_prob.shape[-2], cap_prob.shape[-1]) # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608] + caption_tensor = dt['cap_tensor'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13] + caption_mask = dt['cap_mask'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13] + cap_loss = cap_head.build_loss(cap_prob, caption_tensor, caption_mask) # [num_query] + cap_cost = cap_loss + else: + raise AssertionError('caption cost type error') + + # Calculate caption probs for each query + # breakpoint() + # if self.opt.refine_pseudo_box: + # sentence_cap_prob = cap_head.build_prob(raw_cap_prob, caption_tensor, caption_mask) + # else: + sentence_cap_prob = - cap_loss + + if indices: + return cap_loss.mean(), cap_probs, seq, sentence_cap_prob + # cap_loss.mean(): [num_matched_query] --> [1], + # cap_probs: dict, contains 'cap_prob_train' or 'cap_prob_eval' [num_matched_query, max_length_sentence, num_word_in_vocab] + # seq: [num_sentence, max_length_sentence+1], here the '+1' means the 1st col is all '0' + + cap_id, query_id = cap_bigids, feat_bigids + cost_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1)) + cost_caption[query_id, cap_id] = cap_cost + loss_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1)) + loss_caption[query_id, cap_id] = cap_loss + cost_caption = cost_caption.reshape(-1, N_q, + max(cap_id) + 1) # batch_size * num_queries * all_caption_num + loss_caption = loss_caption.reshape(-1, N_q, max(cap_id) + 1) + return cost_caption, loss_caption, cap_probs, seq + + def caption_prediction_eval(self, cap_head, dt, hs, reference, others, decoder_type, pred_num=None, indices=None): + assert indices == None + N_, N_q, C = hs.shape + query_mask = others['proposals_mask'] + gt_mask = dt['gt_boxes_mask'] + mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item()) + query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu() + hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C) + + row_idx, col_idx = 0, 0 + for i in range(N_): + mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1 + row_idx = row_idx + query_nums[i] + col_idx = col_idx + gt_nums[i] + + cap_probs = {} + + if decoder_type in ['none']: + cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device) + cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device) + seq = torch.zeros(N_, N_q, 3, device=hs.device) + return cap_probs, seq + + elif decoder_type in ['light']: + clip = hs_r.unsqueeze(1) + clip_mask = clip.new_ones(clip.shape[:2]) + event = None + seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + + elif decoder_type in ['standard']: + assert N_ == 1, 'only support batchsize = 1' + with torch.no_grad(): + if self.opt.transformer_input_type == 'prior_proposals': + # hs: [bs, num_query, feat_dim] + # reference: [bs, num_query, 2] + if pred_num: + num_cap = pred_num + else: + num_cap = dt['cap_tensor'].shape[0] + interval = N_q // num_cap + pool_layer = torch.nn.AvgPool1d(interval,stride=interval) + hs = pool_layer(hs.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # [batch, num_sentence, dim] + reference = pool_layer(reference.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # # [batch, num_sentence, 2] + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, num_cap, seq.shape[-1]) # + cap_prob_eval = cap_prob_eval.reshape(-1, num_cap, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + else: + seq, cap_prob_eval = cap_head.sample(hs, reference, others) + if len(seq): + seq = seq.reshape(-1, N_q, seq.shape[-1]) # + cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1]) + cap_probs['cap_prob_eval'] = cap_prob_eval + return cap_probs, seq + + +class PostProcess(nn.Module): + """ This module converts the model's output into the format expected by the coco api""" + + def __init__(self, opt): + super().__init__() + self.opt = opt + + @torch.no_grad() + def forward(self, outputs, target_sizes, loader): + """ Perform the computation + Parameters: + outputs: raw outputs of the model + target_sizes: tensor of dimension [batch_size] containing the size of each video of the batch + """ + out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes'] + N, N_q, N_class = out_logits.shape + assert len(out_logits) == len(target_sizes) + prob = out_logits.sigmoid() # batch, num_queries, 1 + + if self.opt.transformer_input_type == 'prior_proposals': + #topk_values = prob.view(N, N_q) + #topk_indexes = torch.arange(N_q, device=prob.device).unsqueeze(0).repeat(N, 1) + topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), outputs['seq'].shape[1], dim=1) + else: + topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), N_q, dim=1) + scores = topk_values + # topk_boxes = topk_indexes // out_logits.shape[2] + topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode='floor') + labels = topk_indexes % out_logits.shape[2] + boxes = box_ops.box_cl_to_xy(out_bbox) + raw_boxes = copy.deepcopy(boxes) + boxes[boxes < 0] = 0 + boxes[boxes > 1] = 1 + boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 2)) + + scale_fct = torch.stack([target_sizes, target_sizes], dim=1) + boxes = boxes * scale_fct[:, None, :] + seq = outputs['seq'] # [batch_size, num_queries, max_Cap_len=30] + cap_prob = outputs['caption_probs']['cap_prob_eval'] # [batch_size, num_queries] + eseq_lens = outputs['pred_count'].argmax(dim=-1).clamp(min=1) + + if len(seq): + mask = (seq > 0).float() + # cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') / ( + # 1e-5 + mask.sum(2).cpu().numpy().astype('float')) + cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') + seq = seq.detach().cpu().numpy().astype('int') # (eseq_batch_size, eseq_len, cap_len) + caps = [[loader.dataset.translator.rtranslate(s) for s in s_vid] for s_vid in seq] + if self.opt.transformer_input_type != 'prior_proposals': + caps = [[caps[batch][idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)] # Re-arrange the caption order accroding to the logits + cap_scores = [[cap_scores[batch, idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)] + else: + bs, num_queries = boxes.shape[:2] + cap_scores = [[-1e5] * num_queries] * bs + caps = [[''] * num_queries] * bs + + results = [ + {'scores': s, 'labels': l, 'boxes': b, 'raw_boxes': b, 'captions': c, 'caption_scores': cs, 'query_id': qid, + 'vid_duration': ts, 'pred_seq_len': sl} for s, l, b, rb, c, cs, qid, ts, sl in + zip(scores, labels, boxes, raw_boxes, caps, cap_scores, topk_boxes, target_sizes, eseq_lens)] + return results + + +class MLP(nn.Module): + """ Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + + +def build(args): + device = torch.device(args.device) + base_encoder = build_base_encoder(args) + # For text encoder when using DTW matcher + # if args.matcher_type == 'DTW' or args.use_pseudo_box: + # if args.pretrained_language_model == 'UniVL': + # print('Load pretrained UniVL model weights') + # text_encoder = load_pretrained_UniVL() + # else: + # for i in range(10): + # try: + # text_encoder = AutoModel.from_pretrained(args.pretrained_language_model, cache_dir=args.huggingface_cache_dir) + # break + # except: + # print('download error in AutoModel, retry...') + # time.sleep(1) + # else: + # text_encoder = None + + transformer = build_deforamble_transformer(args) + captioner = build_captioner(args) + + model = PDVC( + base_encoder, + transformer, + captioner, + num_classes=args.num_classes, + num_queries=args.num_queries, + num_feature_levels=args.num_feature_levels, + aux_loss=args.aux_loss, + with_box_refine=args.with_box_refine, + opt=args + ) + + matcher = build_matcher(args) + if args.matcher_type == 'DTW' and args.use_anchor: + weight_dict = {'loss_ce': args.cls_loss_coef, + 'loss_bbox': args.bbox_loss_coef, + 'loss_giou': args.giou_loss_coef, + 'loss_self_iou': args.self_iou_loss_coef, + 'loss_ref_rank': args.ref_rank_loss_coef, + 'loss_counter': args.count_loss_coef, + 'loss_caption': args.caption_loss_coef, + 'contrastive_loss': args.contrastive_loss_start_coef, + } + else: + weight_dict = {'loss_ce': args.cls_loss_coef, + 'loss_bbox': args.bbox_loss_coef, + 'loss_giou': args.giou_loss_coef, + 'loss_counter': args.count_loss_coef, + 'loss_caption': args.caption_loss_coef, + 'contrastive_loss': args.contrastive_loss_start_coef, + } + if args.refine_pseudo_box: + weight_dict.update({'loss_mil': args.mil_loss_coef}) + # TODO this is a hack + if args.aux_loss: + aux_weight_dict = {} + for i in range(args.dec_layers - 1): + aux_weight_dict.update({k + f'_{i}': v for k, v in weight_dict.items()}) + weight_dict.update(aux_weight_dict) + + losses = ['labels', 'boxes', 'cardinality'] + + if args.matcher_type == 'DTW' or args.matcher_type == 'Sim': + criterion = AlignCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha, + focal_gamma=args.focal_gamma, opt=args) + contrastive_criterion = ContrastiveCriterion(temperature=args.contrastive_loss_temperature, + enable_cross_video_cl=args.enable_cross_video_cl, + enable_e2t_cl = args.enable_e2t_cl, + enable_bg_for_cl = args.enable_bg_for_cl) + contrastive_criterion.to(device) + else: + criterion = SetCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha, + focal_gamma=args.focal_gamma, opt=args) + contrastive_criterion = None + + criterion.to(device) + postprocessors = {'bbox': PostProcess(args)} + + return model, criterion, contrastive_criterion, postprocessors + + diff --git a/yc2_univl/backup/pdvc/position_encoding.py b/yc2_univl/backup/pdvc/position_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb71befd6e4397bd4d5a30c7a43861cea158cc7 --- /dev/null +++ b/yc2_univl/backup/pdvc/position_encoding.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Various positional encodings for the transformer. +""" +import math +import torch +from torch import nn + +from misc.detr_utils.misc import NestedTensor + + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + self.max_duration = 256 + self.duration_embed_layer = nn.Linear(self.max_duration, self.max_duration) + + def forward(self, tensor_list: NestedTensor): + x = tensor_list.tensors + mask = tensor_list.mask + duration = tensor_list.duration + assert mask is not None + not_mask = ~mask + x_embed = not_mask.cumsum(1, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + x_embed = (x_embed - 0.5) / (x_embed[:, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + # dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + dim_t = self.temperature ** (2 * (torch.div(dim_t, 2, rounding_mode='floor')) / self.num_pos_feats) + pos_x = x_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + + dur_embed = self.duration_embedding(duration).reshape(-1,1,self.max_duration).expand_as(pos_x) + pos = torch.cat((pos_x, dur_embed), dim=2).permute(0, 2, 1) + return pos + + def duration_embedding(self, durations): + out = torch.zeros(len(durations), self.max_duration, device=durations.device) + durations = durations.int() + for ii in range(len(durations)): + out[ii, :durations[ii]] = 1 + out = self.duration_embed_layer(out) + return out + + + +def build_position_encoding(position_embedding, N_steps): + if position_embedding in ('v2', 'sine'): + # TODO find a better way of exposing other arguments + position_embedding = PositionEmbeddingSine(N_steps, normalize=True) + else: + raise ValueError(f"not supported {position_embedding}") + + return position_embedding diff --git a/yc2_univl/backup/pdvc/util.py b/yc2_univl/backup/pdvc/util.py new file mode 100644 index 0000000000000000000000000000000000000000..7e489c1bce356a96116e2c13fcabc1c84d132711 --- /dev/null +++ b/yc2_univl/backup/pdvc/util.py @@ -0,0 +1,72 @@ +import torch +import numpy as np + +# def find_center_index(array: np.ndarray) -> np.ndarray: +# """ +# Given a array with shape [steps, topk], find the center index between topk indexes +# which has the minimal average distance with other indexes. + +# Args: +# - array: numpy array representing the input array with shape [steps, topk] + +# Returns: +# - center_indexes: numpy array of center indexes for each step +# """ + +# distances = np.sum(np.abs(array[:, np.newaxis, :] - array[:, :, np.newaxis]), axis=2) +# center_indexes = np.argmin(distances, axis=1) + +# return center_indexes + +def find_center_value(arr): + # Compute pairwise distances between all values + distances = np.abs(arr[:, np.newaxis] - arr[np.newaxis, :]) + + # Sum distances for each value + sum_distances = np.sum(distances, axis=1) + + # Find the index of the value with the smallest sum distance + center_index = np.argmin(sum_distances) + + # Get the center value + center_value = arr[center_index] + + return center_value + + +def compute_overlap(center_t, boundary_t, center_t_minus_1, boundary_t_minus_1): + """ + Compute the overlap of boundaries between time t and t-1 for each element in the arrays. + + Args: + - center_t: numpy array representing the center at time t with shape [N,] + - boundary_t: numpy array representing the boundary at time t with shape [N,1, candidates] + - center_t_minus_1: numpy array representing the center at time t-1 with shape [N,] + - boundary_t_minus_1: numpy array representing the boundary at time t-1 with shape [N,] + + Returns: + - overlap: numpy array representing the overlap of boundaries with shape [N,] + """ + + boundary_t = boundary_t.squeeze(1) + boundary_t_minus_1 = boundary_t_minus_1.squeeze(1) + center_t = center_t[:, np.newaxis] + # breakpoint() + center_t_minus_1 = center_t_minus_1[:, np.newaxis] + # boundary_t_minus_1 = boundary_t_minus_1[:, np.newaxis] + + + # Calculate the start and end positions of the boundaries at time t and t-1 + start_t = center_t - 0.5 * boundary_t + end_t = center_t + 0.5 * boundary_t + start_t_minus_1 = center_t_minus_1 - 0.5 * boundary_t_minus_1 + end_t_minus_1 = center_t_minus_1 + 0.5 * boundary_t_minus_1 + + # Calculate the intersection and union of the boundaries + intersection = np.maximum(0, np.minimum(end_t, end_t_minus_1) - np.maximum(start_t, start_t_minus_1)) + union = boundary_t + boundary_t_minus_1 - intersection + + # Compute the overlap using the Intersection over Union (IoU) formula + overlap = intersection / union + + return overlap \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/video_segmentation.py b/yc2_univl/backup/pdvc/video_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..dfa7f74cf0a858fc7dc8a929638fc294fc9bfc13 --- /dev/null +++ b/yc2_univl/backup/pdvc/video_segmentation.py @@ -0,0 +1,976 @@ +import torch +import numpy as np + + +from pdvc.dp.exact_dp import drop_dtw, double_drop_dtw +from pdvc.dp.dp_utils import compute_sim +import statistics +from sklearn.cluster import KMeans +from pdvc.util import find_center_value, compute_overlap +# from config import CONFIG + +''' configs of original file ''' +config_eval_l2norm = True +config_eval_keep_percentile = 0.48 +config_eval_fixed_drop_sim = -1 + + +''' +return value: +frame features: [num_frames, feature_dim] -> optimal_assignment: [num_steps], -1 means no match, otherwise means the index of the matched step/caption/query + +''' +# filter_threshold = 0.5 + +def clip_array(arr, threshold): + clipped_arr = np.where(arr > threshold, arr, threshold) + return clipped_arr + + +# def compute_filtered_indices(topk_indices_list, topk_values_list, scale=0.5): +# # center_indices = [] +# # boundary_widths = [] +# filtered_indices_list = [] +# for topk_indices, topk_values in zip(topk_indices_list, topk_values_list): +# center_index = find_center_value(topk_indices) +# std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / sum(topk_values)) ** 0.5 +# boundary_width = std_index * scale +# filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width] +# filtered_indices_list.append(filtered_indices) +# # center_indices.append(center_index) +# # boundary_widths.append(boundary_width) + +# return filtered_indices_list + +def compute_filtered_indices(topk_indices, topk_values, threshold=0.5): + center_index = find_center_value(np.array(topk_indices)) + std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / (sum(topk_values) + 1e-5)) ** 0.5 + boundary_width = std_index * threshold + filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width] + return filtered_indices + +def compute_bbox_loss(index_list, box, similarity_values): + left, right = box + distances = [] + + for i, index in enumerate(index_list): + if left <= index <= right: + distance = -min(index - left, right - index) + else: + distance = max(left - index, index - right) + + weighted_distance = similarity_values[i] * distance + distances.append(weighted_distance) + + return sum(distances) + + + + + +def remove_outliers(indices, threshold, mode, w): + # Calculate the mean and standard deviation of the indices + if mode == 'median': + median = statistics.median(indices) + elif mode == 'mean': + mean = sum(indices) / len(indices) + elif mode == 'mode': + count_dict = {} + for p in range(min(indices), max(indices) + 1): + # print(p) + count = sum(1 for c in indices if p - w <= c <= p + w) + count_dict[p] = count + + max_count = max(count_dict.values()) + best_p_values = [p for p, count in count_dict.items() if count == max_count] + if len(best_p_values) % 2 == 0: + best_p_values.pop() + + mode_value = statistics.median(best_p_values) + std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5 + + # if mode == 'mode': + # '''get mode-similar statistics''' + # count_dict = {} + # for p in range(min(indices), max(indices) + 1): + # # print(p) + # count = sum(1 for c in indices if p - w <= c <= p + w) + # count_dict[p] = count + + # max_count = max(count_dict.values()) + # best_p_values = [p for p, count in count_dict.items() if count == max_count] + # if len(best_p_values) % 2 == 0: + # best_p_values.pop() + + # mode_value = statistics.median(best_p_values) + + # Calculate the threshold for identifying outliers + threshold_value = threshold * std_dev + + # Filter out indices that are far from the mean + # breakpoint() + + if mode == 'median': + filtered_indices = [i for i in indices if abs(i - median) <= threshold_value] + elif mode == 'mode': + filtered_indices = [i for i in indices if abs(i - mode_value) <= threshold_value] + return filtered_indices + + +def remove_outliers_v1(indices, threshold): + pass + +def get_mode(indices, w): + count_dict = {} + for p in range(min(indices), max(indices) + 1): + # print(p) + count = sum(1 for c in indices if p - w <= c <= p + w) + count_dict[p] = count + + max_count = max(count_dict.values()) + best_p_values = [p for p, count in count_dict.items() if count == max_count] + if len(best_p_values) % 2 == 0: + best_p_values.pop() + + mode_value = statistics.median(best_p_values) + return mode_value + +def get_mode_box(sim, topk, w, ratio): # topk选择20 ratio 1 + ''' 注意这里算中心的时候使用前topk是因为更相信前topk的准确率 但是确定中心以后需要找边界 就需要使用全部的''' + avg_caption_length = sim.shape[1] // sim.shape[0] + sorted_idx = torch.argsort(-sim, dim=1) + top_indices = sorted_idx[:, :topk] + # top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True) + # top_indices_half = top_indices[:, :topk//2] + top_cap_indices = sorted_idx[:, :avg_caption_length] + # sorted_idx = torch.argsort(-sim, dim=1) + width = int(ratio * avg_caption_length / 2) # ratio选择1 + + bbox = [] + for i in range(top_indices.shape[0]): + # index_list = top_indices[i].tolist() + mode_value = get_mode(top_indices[i].tolist(), w) + filtered_indices = [i for i in top_cap_indices[i].tolist() if abs(i - mode_value) <= width] + + # if len(filtered_indices) == 0: + # filtered_indices = remove_outliers(sim[i].tolist(), top_indices[i].tolist(), 0.5, mode='median', w=w) + # if len(filtered_indices) == 0: + # bbox.append([0, sim.shape[1] - 1]) + # continue + if len(filtered_indices) == 0: + bbox.append([mode_value-width, mode_value+width]) + else: + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +def compute_threshold(data, threshold): + mean = sum(data) / len(data) + std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5 + threshold_value = threshold * std_dev + return threshold_value + + +# using similarity as weight to find center +''' find center globally, then find the boundary locally. + 1. find center: use the similarity as weight to find the center + 2. find boundary: use the center to find the boundary. steps are ''' +def step_retrieval_weight_sim(frame_features, step_features, topk=15, threshold=0.5, w=2): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # sim sum along a window + window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + + if len(window_sums.shape) == 1: + window_sums = window_sums.unsqueeze(0) + flag = 1 + else: + flag = 0 + + top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True) + # breakpoint() + + # Find the frame with the maximum sum in each step + _, step_center_frames = window_sums.max(dim=1) + step_center_frames = step_center_frames.squeeze() + + if flag == 1: + step_center_frames = step_center_frames.unsqueeze(0).tolist() + else: + step_center_frames = step_center_frames.tolist() + + bbox = [] + for i in range(top_indices.shape[0]): + threshold_value = compute_threshold(top_indices[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value] + if len(filtered_indices) == 0: + bbox.append([step_center_frames[i] - w, step_center_frames[i] + w]) + else: + bbox.append([w + min(filtered_indices), w + max(filtered_indices)]) + + return bbox + +''' TODO: get the right weight using index''' +def step_retrieval_weight_index(frame_features, step_features, topk=15, threshold=0.5, w=2): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_idx = torch.argsort(-similarity_matrix, dim=1) + # sim sum along a window + window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + + top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True) + # breakpoint() + + # Find the frame with the maximum sum in each step + _, step_center_frames = window_sums.max(dim=1) + step_center_frames = step_center_frames.squeeze().tolist() + + bbox = [] + for i in range(top_indices.shape[0]): + threshold_value = compute_threshold(top_indices[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value] + bbox.append([w + min(filtered_indices), w + max(filtered_indices)]) + + return bbox + +def uniform_box(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + return uniform_boxes + + +def align_frame_into_steps(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + average_width = int(sim.shape[1] // sim.shape[0] / 2) + # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # bbox = get_mode_box(sim, topk, w, ratio) + + top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True) + bbox = [] + for i in range(top_indices.shape[0]): + filtered_indices = remove_outliers(top_indices[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) < 2: + filtered_indices = remove_outliers(top_indices[i].tolist(), 2*threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append([top_indices[0] - average_width, top_indices[0] + average_width]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +# use optimization to compute pseudo boundary +def align_frame_into_steps_op(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1): + # frame_features: torch.Size([200, 768]) + augment_ratio_list = np.arange(0.5, 2, 0.1) + + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + # breakpoint() + # [#step, #frame] + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + + num_steps, num_frames = similarity_matrix.shape + + # Select top-k frames for each caption [#step, #topk] + sorted_indices = np.argsort(similarity_matrix, axis=1) + # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] + # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + + # Compute center indexes [#step, 1] + + + # Update boundary width + initial_boundary_width = num_frames / num_steps # 1 + # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 + # overlap = np.zeros(num_steps) + + for i in range(num_iterations): + if i == 0 and not order: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + # # overlap_weight = 0 + else: + if i == 0: + segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int) + start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:] + start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames) + boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1) + + filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)] + if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + else: + boundary_width_last = boundary_width.reshape(-1, 1, 1) + start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames) + + topk_indices = [] + topk_values = [] + for j, (start, end) in enumerate(zip(start_indices, end_indices)): + # breakpoint() + filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)] + topk_index = filtered_indices[-topk:] + topk_indices.append(topk_index) + topk_values.append(similarity_matrix[j][topk_index]) + previous_index_center = center_indexes.copy() if i > 0 else None + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + + # top_indices = sorted_indices[:, ] + # previous_index_center = center_indexes + # # overlap_weight = 0.5 * np.sum(overlap) + + boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + # breakpoint() + + index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]] + + loss_candidates_list = [value[:, np.newaxis] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + + + # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + + # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates] + + if i == 0: + loss = loss_sim + # print('loss shape:', loss_sim.shape, loss.shape) + else: + # measure the overlap between boundaries given center and boundary width + overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] + # breakpoint() + # print(loss_sim.shape, overlap.shape) + loss = loss_sim + beta * overlap + # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim)) + # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) + # find the best boundary width + # breakpoint() + best_boundary_width_index = np.argmin(loss, axis=1) # [#step] + + # Use broadcasting to create row indices corresponding to each row + # row_indices = np.arange(num_steps)[:, np.newaxis] + # breakpoint() + # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) + boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step] + # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step] + boundary_width = np.array(boundary_width) + # print(boundary_width.shape) + + bbox = [] + left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames) + right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames) + # breakpoint() + bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int) + + return bbox.tolist() + +# use optimization to compute pseudo boundary +def align_frame_into_steps_op_v1(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1): + # frame_features: torch.Size([200, 768]) + augment_ratio_list = np.arange(0.5, 2, 0.1) + + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + # breakpoint() + # [#step, #frame] + similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + + num_steps, num_frames = similarity_matrix.shape + + # Select top-k frames for each caption [#step, #topk] + sorted_indices = np.argsort(similarity_matrix, axis=1) + # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] + # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + + # Compute center indexes [#step, 1] + + + # Update boundary width + initial_boundary_width = num_frames / num_steps # 1 + # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 + # overlap = np.zeros(num_steps) + + for i in range(num_iterations): + if i == 0 and not order: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + # # overlap_weight = 0 + else: + if i == 0: + segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int) + start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:] + start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames) + boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1) + + filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)] + if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4: + boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) + topk_indices = [index[-topk:] for index in sorted_indices] + topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)] + + + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + previous_index_center = None + else: + boundary_width_last = boundary_width.reshape(-1, 1, 1) + start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames) + end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames) + + topk_indices = [] + topk_values = [] + for j, (start, end) in enumerate(zip(start_indices, end_indices)): + # breakpoint() + filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)] + topk_index = filtered_indices[-topk:] + topk_indices.append(topk_index) + topk_values.append(similarity_matrix[j][topk_index]) + previous_index_center = center_indexes.copy() if i > 0 else None + center_indexes = np.array([find_center_value(index) for index in topk_indices]) + + # top_indices = sorted_indices[:, ] + # previous_index_center = center_indexes + # # overlap_weight = 0.5 * np.sum(overlap) + + boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + # breakpoint() + + index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]] + + weight_distance = [clip_array(index_distance[i], 0.5 * boundary_width_candidates[i]) for i in range(len(topk_indices))] # [[topk, 1]] + + loss_candidates_list = [value[:, np.newaxis] / weight_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]] + + + # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + + # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates] + + if i == 0: + loss = loss_sim + # print('loss shape:', loss_sim.shape, loss.shape) + else: + # measure the overlap between boundaries given center and boundary width + overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] + # breakpoint() + # print(loss_sim.shape, overlap.shape) + loss = loss_sim + beta * overlap + # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim)) + # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) + # find the best boundary width + # breakpoint() + best_boundary_width_index = np.argmin(loss, axis=1) # [#step] + + # Use broadcasting to create row indices corresponding to each row + # row_indices = np.arange(num_steps)[:, np.newaxis] + # breakpoint() + # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) + boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step] + # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step] + boundary_width = np.array(boundary_width) + # print(boundary_width.shape) + + bbox = [] + left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames) + right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames) + # breakpoint() + bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int) + + return bbox.tolist() + + + + + +# # use optimization to compute pseudo boundary +# def align_frame_into_steps_op_order(frame_features, step_features, topk=15, threshold=0.5, num_iterations=4, beta=1): +# # frame_features: torch.Size([200, 768]) +# augment_ratio_list = np.arange(0.5, 2, 0.1) + +# if step_features.shape[0] == 0: +# return -np.ones(frame_features.shape[0]) + +# # breakpoint() +# # [#step, #frame] +# similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy() + +# num_steps, num_frames = similarity_matrix.shape + +# # Select top-k frames for each caption [#step, #topk] +# top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:] +# top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1) + +# # Compute center indexes [#step, 1] +# center_indexes = find_center_index(top_indices)[:, np.newaxis] + +# # Update boundary width +# initial_boundary_width = num_frames / num_steps # 1 +# # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 +# # overlap = np.zeros(num_steps) + +# for i in range(num_iterations): +# if i == 0: +# boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1) +# # previous_index_center = None +# # # overlap_weight = 0 +# else: +# boundary_width_last = boundary_width.reshape(-1, 1, 1) +# previous_index_center = center_indexes +# # overlap_weight = 0.5 * np.sum(overlap) + +# boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates] + +# index_distance = np.abs(top_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1] + +# loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates] + +# if i == 0: +# loss = loss_sim # # [#step, #candidates] +# print('loss shape:', loss_sim.shape, loss.shape) +# else: +# # measure the overlap between boundaries given center and boundary width +# overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates] +# loss = loss_sim + beta * overlap +# print('loss shape:', loss_sim.shape, overlap.shape, loss.shape) +# # find the best boundary width +# # breakpoint() +# best_boundary_width = np.argmin(loss, axis=1) # [#step] +# # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape) +# boundary_width = boundary_width_candidates[:,0][np.arange(num_steps), best_boundary_width] # [#step] +# # print(boundary_width.shape) + +# return center_indexes, boundary_width +# based on original code but change the method to compute center and std +def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_index = torch.argsort(-sim, dim=1) + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])] + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + + iter_bbox_loss = {} + for iter in range(iteration): + if iter == 0: + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + else: + refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox + + + # global: from all frames, local: from refined uniform boxes + + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + top_values_list = top_values_list_global + else: + top_indices_list = top_indices_list_local + top_values_list = top_values_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = compute_filtered_indices(top_indices_list[i].tolist(), top_values_list[i].tolist(), threshold) + if len(filtered_indices) == 0: + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + # compute bbox loss + bbox_loss_list = [compute_bbox_loss(top_indices_list[i], bbox[i], top_values_list[i]) for i in range(len(top_indices_list))] + bbox_loss = sum(bbox_loss_list) + iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox} + + # select the minimum bbox loss and bbox as output + min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss']) + min_loss = iter_bbox_loss[min_loss_iter]['loss'] + best_bbox = iter_bbox_loss[min_loss_iter]['bbox'] + + + return (best_bbox, min_loss) + +def align_frame_into_steps_op_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_index = torch.argsort(-sim, dim=1) + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])] + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + + iter_bbox_loss = {} + for iter in range(iteration): + # if iter == 0: + # refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + # else: + # refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox + + + # global: from all frames, local: from refined uniform boxes + + # top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + # top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])] + + # size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + # if sum(size_local) < (topk-2) * len(size_local): + # top_indices_list = top_indices_list_global + # top_values_list = top_values_list_global + # else: + # top_indices_list = top_indices_list_local + # top_values_list = top_values_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list_global)): + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_values_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + # compute bbox loss + bbox_loss_list = [compute_bbox_loss(top_indices_list_global[i], bbox[i], top_values_list_global[i]) for i in range(len(top_indices_list_global))] + bbox_loss = sum(bbox_loss_list) + iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox} + + # select the minimum bbox loss and bbox as output + min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss']) + min_loss = iter_bbox_loss[min_loss_iter]['loss'] + best_bbox = iter_bbox_loss[min_loss_iter]['bbox'] + + + return (best_bbox, min_loss) + + + +# pesudo box 4: based on fixed window. the result is bad. give up +def align_frame_into_steps_mode(frame_features, step_features, topk=15, w=2, ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # frame_features, step_features = frame_features.cpu(), step_features.cpu() + + + bbox = get_mode_box(sim, topk, w, ratio) + return bbox + +def uniform_window(frame_num, step_num): + uniform_timestamps = torch.linspace(0, frame_num, step_num + 1) + uniform_timestamps = torch.round(uniform_timestamps).int().tolist() + bbox = [] + for i in range(step_num): + bbox.append([uniform_timestamps[i], uniform_timestamps[i+1] - 1]) + + # window_size = frame_num // step_num + # bbox = [] + # for i in range(step_num): + # bbox.append([i * window_size, (i + 1) * window_size - 1]) + # bbox[-1][1] = frame_num - 1 + return bbox + +def expand_window(uniform_bbox, frame_num, step_num, ratio=1): + '''ratio: gt box相对uniform box的波动范围 超过这个范围视为不可能 ratio单位为一个caption的平均长度''' + window_size = frame_num // step_num + refined_bbox = [] + for bbox in uniform_bbox: + start = max(0, bbox[0] - ratio * window_size) + end = min(frame_num - 1, bbox[1] + ratio * window_size) + refined_bbox.append([start, end]) + return refined_bbox + +# pesudo box 3: based on sim, consider the order of steps +def align_frame_into_steps_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + # old setting (index is wrong) + # # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])] + # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list] + # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + + sorted_index = torch.argsort(-sim, dim=1) + # global: from all frames, local: from refined uniform boxes + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + + + + +# based on pbox3, if ratio 1 has enough value, use it otherwise +def align_frame_into_steps_order_adapt(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + # old setting (index is wrong) + # # frame_features, step_features = frame_features.cpu(), step_features.cpu() + # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])] + # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list] + # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + + sorted_index = torch.argsort(-sim, dim=1) + # global: from all frames, local: from refined uniform boxes + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-1) * len(size_local): + flag = 0 + for i in range(4): + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio+i*0.5) + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) >= (topk-1) * len(size_local): + flag = 1 + break + if flag == 0: + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w) + if len(filtered_indices) == 0: + filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + +def step_retrieval_weight_sim_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, ratio=1): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + # breakpoint() + + window_sums = torch.nn.functional.conv1d(sim.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze() + if len(window_sums.shape) == 1: + window_sums = window_sums.unsqueeze(0) + + + sorted_index = torch.argsort(-window_sums, dim=1) + w + + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + + + size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + if sum(size_local) < (topk-2) * len(size_local): + top_indices_list = top_indices_list_global + else: + top_indices_list = top_indices_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list)): + threshold_value = compute_threshold(top_indices_list[i].tolist(), threshold) + filtered_indices = [frame for frame in top_indices_list[i].tolist() if abs(frame - top_indices_list[i][0]) <= threshold_value] + if len(filtered_indices) == 0: + bbox.append([top_indices_list[i] - w, top_indices_list[i] + w]) + else: + bbox.append([min(filtered_indices), max(filtered_indices)]) + + return bbox + +# pesudo box 0: based on dtw +def segment_video_into_steps(frame_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值 + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) # 直接找与每个step最匹配的frame 这样原则上是一对一匹配 + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度 + return optimal_assignment + +def align_query_into_steps(query_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(query_features.shape[0]) + + sim = compute_sim(step_features, query_features, config_eval_l2norm).cpu() + query_features, step_features = query_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值 + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) # 直接找与每个step最匹配的frame 这样原则上是一对一匹配 + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, one_to_one=True, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度 + return optimal_assignment + +# inference时 video和slots之间的匹配 +def segment_video_into_slots(video_features, pred_steps): + sim = compute_sim(pred_steps, video_features, l2_norm=config_eval_l2norm).detach() + if config_eval_fixed_drop_sim == -1: + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + else: + baseline_logit = torch.tensor(config_eval_fixed_drop_sim) + baseline_logits = baseline_logit.repeat([1, sim.shape[1]]) # making it of shape [1, N] + x_drop_costs = -baseline_logits.squeeze() + zx_costs = -sim + + z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze() + zx_costs = zx_costs - z_drop_costs[0].reshape([1, 1]) + z_drop_costs = z_drop_costs - z_drop_costs[0] + x_drop_costs = x_drop_costs - x_drop_costs[0] + segmentation = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy(), return_labels=True) - 1 + return segmentation + + +# get_index and alignment_to_boundary are used for 'align' based manner +def get_index(alignment): + start_idx, end_idx = [], [] + for i in range(len(alignment)): + if alignment[i] == -1: + if i != 0 and alignment[i-1] != -1: + end_idx.append(i-1) + continue + if i == 0: + start_idx.append(i) + elif alignment[i] != alignment[i-1]: + start_idx.append(i) + if alignment[i-1] != -1: + end_idx.append(i-1) + if i == len(alignment) - 1: + end_idx.append(i) + assert len(start_idx) == len(end_idx) + for s, e in zip(start_idx, end_idx): + assert alignment[s] <= alignment[e] + return start_idx, end_idx + +def alignment_to_boundary(alignment, video_frame_num): + start_idx, end_idx = get_index(alignment) + start_time = start_idx / video_frame_num + end_time = end_idx / video_frame_num + boundaries = list(zip(start_time, end_time)) + + return np.float32(np.stack(boundaries, axis=0)) + + +def to_center_duration(alignments): + new_alignments = [] + for alignment in alignments: + start, end = alignment[:, 0], alignment[:, 1] + center = (start + end) / 2 + duration = end - start + alignment[:, 0], alignment[:, 1] = center, duration + new_alignments.append(alignment) + return new_alignments \ No newline at end of file diff --git a/yc2_univl/backup/pdvc/video_segmentation_ori.py b/yc2_univl/backup/pdvc/video_segmentation_ori.py new file mode 100644 index 0000000000000000000000000000000000000000..9d06e59f3b5a80fb4e8a765d20287175b03568d4 --- /dev/null +++ b/yc2_univl/backup/pdvc/video_segmentation_ori.py @@ -0,0 +1,127 @@ +import torch +import numpy as np +import statistics + +from pdvc.dp.exact_dp import drop_dtw +from pdvc.dp.dp_utils import compute_sim +import statistics +from sklearn.cluster import KMeans + + +config_eval_l2norm = True +config_eval_keep_percentile = 0.48 # Calculated from the data +config_eval_fixed_drop_sim = -1 + +def segment_video_into_steps(frame_features, step_features, unordered=False): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, l2_norm=True).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)]) + baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach() + baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0] # making it of shape [1, N] + zx_costs, drop_costs = -sim, -baseline_logits + zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]] + sim = sim.detach().cpu().numpy() + + if unordered: + max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0) + optimal_assignment[max_vals < baseline_logit.item()] = -1 + else: + optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 + return optimal_assignment # [num_frames] + +def get_index(alignment): + start_idx, end_idx = [], [] + for i in range(len(alignment)): + if alignment[i] == -1: + if i != 0 and alignment[i-1] != -1: + end_idx.append(i-1) + continue + if i == 0: + start_idx.append(i) + elif alignment[i] != alignment[i-1]: + start_idx.append(i) + if alignment[i-1] != -1: + end_idx.append(i-1) + if i == len(alignment) - 1: + end_idx.append(i) + assert len(start_idx) == len(end_idx) + for s, e in zip(start_idx, end_idx): + assert alignment[s] <= alignment[e] + return start_idx, end_idx + +def get_index_update(alignment): + optimal_alignment = np.append(np.insert(alignment, 0, -1), -1) + diff_optimal_alignment = np.diff(optimal_alignment) + + optimal_alignment_end = optimal_alignment.copy() + optimal_alignment_end[optimal_alignment_end==-1] = max(optimal_alignment_end) + 1 + diff_optimal_alignment_end = np.diff(optimal_alignment_end) + + start_idx = np.where(diff_optimal_alignment>0)[0] + end_idx = np.where(diff_optimal_alignment_end>0)[0] - 1 + return start_idx, end_idx + +def alignment_to_boundary(alignment, video_frame_num): + start_idx, end_idx = get_index(alignment) + start_time = start_idx / video_frame_num + end_time = end_idx / video_frame_num + boundaries = list(zip(start_time, end_time)) + + return np.float32(np.stack(boundaries, axis=0)) + + +def to_center_duration(alignments): + new_alignments = [] + for alignment in alignments: + start, end = alignment[:, 0], alignment[:, 1] + center = (start + end) / 2 + duration = end - start + alignment[:, 0], alignment[:, 1] = center, duration + new_alignments.append(alignment) + return new_alignments + + +def remove_outliers(indices, threshold): + # Calculate the mean and standard deviation of the indices + median = statistics.median(indices) + mean = sum(indices) / len(indices) + std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5 + + # Calculate the threshold for identifying outliers + threshold_value = threshold * std_dev + + # Filter out indices that are far from the mean + filtered_indices = [i for i in indices if abs(i - median) <= threshold_value] + + return filtered_indices + + +def align_frame_into_steps(frame_features, step_features, unordered=False, k=15, threshold=0.5): + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, True).cpu() + frame_features, step_features = frame_features.cpu(), step_features.cpu() + + top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True) + bbox = [] + for i in range(top_indices.shape[0]): + filtered_indices = remove_outliers(top_indices[i].tolist(), threshold) + bbox.append([min(filtered_indices), max(filtered_indices)]) + return bbox + +if __name__ == '__main__': + # frame_features = torch.randn(100, 768) + # text_features = torch.randn(8, 768) + # alignment = segment_video_into_steps(frame_features, text_features) + # breakpoint() + arr = [-1,-1,0,1,2,2,2,-1,-1,3,4,4,-1,-1,5,5,5,-1,6,6,7,-1,-1, 8, 8, 9] + start, end = get_index(arr) + start_1, end_1 = get_index_update(arr) + # start = [2, 3, 4, 8, 9, 13, 16, 18] + # end = [2, 3, 5, 8, 10, 15, 17, 18] + breakpoint() diff --git a/yc2_univl/backup/test.py b/yc2_univl/backup/test.py new file mode 100644 index 0000000000000000000000000000000000000000..e1dcf9d7be821a3db142566cb23914ea96f1c064 --- /dev/null +++ b/yc2_univl/backup/test.py @@ -0,0 +1,64 @@ +# from pdvc.video_segmentation import align_frame_into_steps_op +# import torch + +# # create two tensors +# frame = torch.rand(200, 768) +# steps = torch.rand(10, 768) + +# bboxs = align_frame_into_steps_op(frame, steps, order=False) +# # breakpoint() +# print('done!') + + +# ================================================================== +# import json + +# filepath = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1/info.json" +# with open(filepath, 'r') as f: +# data = json.load(f) + +# val_history = data['history']['val_result_history'] + +# metric_sum = {} +# metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] +# for k, v in val_history.items(): +# metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) +# print(f"{k}: {metric_sum[k]}") + +# best_epoch = max(metric_sum, key=metric_sum.get) +# print(val_history[best_epoch]['eval_score']) +# # write the val_history to a file +# with open('val.log', 'w') as f: +# for k, v in val_history[best_epoch]['eval_score'].items(): +# f.write(f"{k}: {v}\n") +# # print(metric_sum) +# # breakpoint() +# print('done!') + +# ================================================================== +import os +import json +import sys +sys.path.append('/mnt/data/Gvlab/wuhao/code/dibs') +from misc.utils import create_logger +save_folder = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1" + +val_logger = create_logger(save_folder, 'val.log') +infos_path = os.path.join(save_folder, 'info.json') + +with open(infos_path, 'r') as f: + data = json.load(f) +val_history = data['history']['val_result_history'] + +metric_sum = {} +metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] +for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + +best_epoch = max(metric_sum, key=metric_sum.get) +best_val_score = val_history[best_epoch]['eval_score'] +val_logger.info(f"Best epoch: {best_epoch}") +print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) +val_logger.info('\nBest Model Performance:\n' + print_info) +val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) \ No newline at end of file diff --git a/yc2_univl/backup/train.py b/yc2_univl/backup/train.py new file mode 100644 index 0000000000000000000000000000000000000000..43c0c73fd63d66eb7055f913723dd086ab80d288 --- /dev/null +++ b/yc2_univl/backup/train.py @@ -0,0 +1,671 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"): + elements = [] + # breakpoint() + if len(opt.train_caption_file) == 2: + if 'puyu' in opt.train_caption_file[0]: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file[0]: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + elements.append('howto') + if 'yc2' in opt.train_caption_file[1]: + elements.append('yc2') + elif 'anet' in opt.train_caption_file[1]: + elements.append('anet') + else: + if 'yc2' in opt.train_caption_file: + elements.append('yc2') + elif 'anet' in opt.train_caption_file: + elements.append('anet') + elif 'howto' in opt.train_caption_file: + if 'puyu' in opt.train_caption_file: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + # elements.append('howto') + + if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]: + elements.append('clip') + elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]: + elements.append('univl') + # add pbox parameters + pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop" + elements.append(pbox_type) + elements.append(f"top{opt.top_frames}") + elements.append(f"r{opt.width_ratio}") + elements.append(f"iter{opt.iteration}") + elements.append(f"th{opt.width_th}") + return os.path.join(save_folder, '_'.join(elements) + '.json') + + + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + + + if path == path_backup: + if path.startswith('/mnt/data'): + pass + else: + # path = '/mnt' + path[6:] + print('map failed') + exit(1) + return path + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + # if use mixlm model + saved_path = construct_save_path(opt) + + if 'mixlm' in saved_path: + # text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature') + mixlm_pbox_path = construct_save_path(opt, save_folder='test').replace('.json', '').replace('test/', '') + text_feature_folder_mixlm = os.path.join('/mnt/data/Gvlab/wuhao/code/tmp', 'mix_text_feature', mixlm_pbox_path) + os.makedirs(text_feature_folder_mixlm, exist_ok=True) + if 'clip' in save_folder or 'CLIP' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj') + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip' + elif 'univl' in save_folder or 'UniVL' in save_folder or 'Uni' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text') + text_feature_folder_puyu = '/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu' + + if not os.path.exists(saved_path): + llama2_pbox_path = saved_path.replace('mixlm', 'llama2') + puyu_pbox_path = saved_path.replace('mixlm', 'puyu') + with open(llama2_pbox_path, 'r') as f: + llama2_pbox = json.load(f) + with open(puyu_pbox_path, 'r') as f: + puyu_pbox = json.load(f) + + mixlm_pbox = {} + for video_key in llama2_pbox.keys(): + if llama2_pbox.get(video_key) is None and puyu_pbox.get(video_key) is None: + mixlm_pbox[video_key] = None + elif llama2_pbox.get(video_key) is None: + mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + elif puyu_pbox.get(video_key) is None: + mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']: + mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + with open(saved_path, 'w') as f: + json.dump(mixlm_pbox, f) + + with open(saved_path, 'r') as f: + mixlm_pbox = json.load(f) + with open('data/howto/captiondata/howto100m_train_puyu.json', 'r') as f: + meta_puyu = json.load(f) + with open('data/howto/captiondata/howto100m_train.json', 'r') as f: + meta_llama2 = json.load(f) + + meta_mixlm = {} + for video_key in mixlm_pbox.keys(): + if mixlm_pbox.get(video_key) is not None and (meta_llama2.get(video_key) is not None or meta_puyu.get(video_key) is not None): + if mixlm_pbox[video_key]['llm'] == 'llama2': + meta_mixlm[video_key] = meta_llama2[video_key] + llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy') + if not os.path.exists(llama2_feature_path): + continue + # if os.path.exists(llama2_feature_path): + # os.unlink(llama2_feature_path) + # if not os.path.exists(llama2_feature_path): + # os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy') + # if os.path.exists(soft_link_path): + # os.unlink(soft_link_path) + if not os.path.exists(soft_link_path): + # print(os.path.exists(soft_link_path), os.path.exists(llama2_feature_path)) + os.symlink(llama2_feature_path, soft_link_path) + # text_feature = np.load(llama2_feature_path) + # if text_feature.shape[0] != len(meta_llama2[video_key]['sentences']): + # print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_llama2[video_key]['sentences'])} sentences found in meta file") + else: + meta_mixlm[video_key] = meta_puyu[video_key] + puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy') + if not os.path.exists(puyu_feature_path): + continue + + soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy') + + # if os.path.exists(soft_link_path): + # os.unlink(soft_link_path) + if not os.path.exists(soft_link_path): + os.symlink(puyu_feature_path, soft_link_path) + # text_feature = np.load(puyu_feature_path) + # if text_feature.shape[0] != len(meta_puyu[video_key]['sentences']): + # print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_puyu[video_key]['sentences'])} sentences found in meta file") + with open(os.path.join(save_folder, 'train_caption_mixlm.json'), 'w') as f: + json.dump(meta_mixlm, f) + opt.train_caption_file[0] = os.path.join(save_folder, 'train_caption_mixlm.json') + opt.text_feature_folder[0] = text_feature_folder_mixlm + # pass + + + if not opt.start_from: + backup_envir(save_folder, opt) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + print(opt.text_feature_folder) + print(opt.train_caption_file) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # try to load saved pbox + if os.path.exists(saved_path): + try: + with open(saved_path, 'r') as f: + model.pseudo_boxes = json.load(f) + except: + # delete the bad file + os.remove(saved_path) + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # if dt['video_key'][0] != 'LGArj9Do0xc': + # continue + # # for fast debugging + if opt.test: + if trained_samples > 5: + break + else: + trained_samples += 1 + # if trained_samples < 1714: + # trained_samples += 1 + # continue + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + try: + output, loss = model(dt, criterion, contrastive_criterion) + except Exception as e: + print(e) + print(dt['video_key']) + continue + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + # breakpoint() + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + + if epoch == 1 and model.pseudo_boxes is not None and 'mixlm' not in opt.train_caption_file[0]: + # save the pseudo boxes + pbox_save_path = construct_save_path(opt) + if not os.path.exists(pbox_save_path): + with open(pbox_save_path, 'w') as f: + json.dump(model.pseudo_boxes, f) + + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # save the pesudo box + + + + # # ===============================old code============================================== + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + # =================================new code========================================================= + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) + + break + + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + # breakpoint() + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_fewshot.py b/yc2_univl/backup/train_fewshot.py new file mode 100644 index 0000000000000000000000000000000000000000..d35b3feefc80f1a87e4fb30394702c28d04472d6 --- /dev/null +++ b/yc2_univl/backup/train_fewshot.py @@ -0,0 +1,482 @@ +# use ft_gt_percent to control the percentage of gt proposals used for finetuning + +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder, opt) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_dataset = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # if dt['video_key'][0] != 'LGArj9Do0xc': + # continue + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + # if trained_samples < 1714: + # trained_samples += 1 + # continue + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_ft.py b/yc2_univl/backup/train_ft.py new file mode 100644 index 0000000000000000000000000000000000000000..bdcc497f763607f28dfb1e0a687705c42e448a09 --- /dev/null +++ b/yc2_univl/backup/train_ft.py @@ -0,0 +1,513 @@ +# coding:utf-8 + +''' +train_seq2.py is different from train_seq.py in the following aspects: + +1. train_seq2.py uses the same dataset for pretraining and target task +2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs +3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task +4. checkpoint is located in save howto_yc2_* or howto_tasty_* +5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_* +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet')) + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder): + print('the checkpoint folder does not exist') + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq2-ft' + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_ft2_gt.py b/yc2_univl/backup/train_ft2_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..b767f5c2525ed10b6551ba02a5551bafe0f1737e --- /dev/null +++ b/yc2_univl/backup/train_ft2_gt.py @@ -0,0 +1,588 @@ +# coding:utf-8 + +''' +similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions) + +set pretrain_data_mode to 'single', it is same as train_ft_gt.py. + +使用全部的howto subset数据进行pretrain, 然后用部分的gt数据进行fine-tune +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single' + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + + + if path == path_backup: + if path.startswith('/mnt/data'): + pass + else: + # path = '/mnt' + path[6:] + print('map failed') + exit(1) + return path + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + opt.refine_pseudo_box = False + opt.pseudo_box_aug = False + # breakpoint() + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) # .replace('_seq2-ft', '') + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'vlep-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('vlep-yc2_yc2', 'vlep_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder) + elif pretrain_data_mode == 'seq': + checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet')) + + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + # breakpoint() + + if opt.id_ori != '': + checkpoint_folder = checkpoint_folder + '_' + opt.id_ori + # breakpoint() + # if opt.id == "": + # pass + # else: + # checkpoint_folder = checkpoint_folder + '_' + opt.id + + if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_es20'): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder, opt) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + # print('the script only support two dataset for pretrain and target task respectively') + # exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + # train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) + + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id_ori = opt.id + + + opt.id = 'seq2-ft({})-gt_percent-{}'.format(pretrain_data_mode, opt.ft_gt_percent) + if opt.id_ori != '': + opt.id = opt.id + '_' + opt.id_ori + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_ft_gt.py b/yc2_univl/backup/train_ft_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..b481c6eb9a19299b401fbe8ce82d10716a846a7c --- /dev/null +++ b/yc2_univl/backup/train_ft_gt.py @@ -0,0 +1,516 @@ +# coding:utf-8 + +''' +train_seq2.py is different from train_seq.py in the following aspects: + +1. train_seq2.py uses the same dataset for pretraining and target task +2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs +3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task +4. checkpoint is located in save howto_yc2_* or howto_tasty_* +5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_* +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq2-ft-gt_percent-{}'.format(opt.ft_gt_percent) + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_pre_ft_gt.py b/yc2_univl/backup/train_pre_ft_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..9440eb8b4b86d2123a997285686e704425519a3f --- /dev/null +++ b/yc2_univl/backup/train_pre_ft_gt.py @@ -0,0 +1,537 @@ +# coding:utf-8 + +''' +similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions) + +set pretrain_data_mode to 'single', it is same as train_ft_gt.py. + + +''' +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath +import re + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single' + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + opt.epoch = 20 + opt.use_pseudo_box = False + opt.refine_pseudo_box = False + opt.pseudo_box_aug = False + + # breakpoint() + if 'howto-tasty_tasty' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-train", save_folder) # .replace('_seq2-ft', '') + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '') + elif 'howto-yc2_yc2' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '') + elif 'howto-anet_anet' in save_folder: + if pretrain_data_mode == 'mix': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder) + # elif pretrain_data_mode == 'seq': + # checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) + elif pretrain_data_mode == 'single': + checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-anet_anet', 'howto_anet')) + else: + print('the script only support settings howto-XXX_XXX') + exit(1) + + if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_test'): + print('the checkpoint folder {} does not exist'.format(checkpoint_folder)) + exit(1) + else: + if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')): + # print('the checkpoint folder has no val.log, denoting the setting is not fully trained') + for i in range(1, 100): + if os.path.exists(f'{checkpoint_folder}_{i}'): + if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')): + checkpoint_folder = f'{checkpoint_folder}_{i}' + break + else: + continue + else: + print(f'{checkpoint_folder}_{i} does not exist') + print('the checkpoint folder does not exist') + exit(1) + + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder, opt) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # # continue training + # if opt.start_from: + # opt.pretrain = False + # infos_path = os.path.join(save_folder, 'info.json') + # with open(infos_path) as f: + # logger.info('Load info from {}'.format(infos_path)) + # saved_info = json.load(f) + # prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + # exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + # for opt_name in prev_opt.keys(): + # if opt_name not in exclude_opt: + # vars(opt).update({opt_name: prev_opt.get(opt_name)}) + # if prev_opt.get(opt_name) != vars(opt).get(opt_name): + # logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + # vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + # [opt.visual_feature_folder[0]], + # [opt.text_feature_folder[0]], + # opt.dict_file, True, 'gt', + # opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + # shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + # train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print('the script only support two dataset for pretrain and target task respectively') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + # breakpoint() + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + + # load pretrained model + + # breakpoint() + # load pretrained model + model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth')) + logger.info('Loading pth from {}'.format(checkpoint_folder)) + model.load_state_dict(model_pth['model']) + + + # # Recover the parameters + # if opt.start_from and (not opt.pretrain): + # if opt.start_from_mode == 'best': + # model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + # elif opt.start_from_mode == 'last': + # model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + # logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + # model.load_state_dict(model_pth['model']) + + # # Load the pre-trained model + # if opt.pretrain and (not opt.start_from): + # logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + # model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # # query_weight = model_pth['model'].pop('query_embed.weight') + # if opt.pretrain == 'encoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + # model.load_state_dict(encoder_pth, strict=True) + # elif opt.pretrain == 'decoder': + # encoder_filter = model.get_filter_rule_for_encoder() + # decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + # model.load_state_dict(decoder_pth, strict=True) + # pass + # elif opt.pretrain == 'full': + # # model_pth = transfer(model, model_pth) + # model.load_state_dict(model_pth['model'], strict=True) + # else: + # raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr * 0.5}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + # if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + # lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # breakpoint() + + # Epoch-level iteration + # opt.use_pseudo_box = False + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + # for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader_target, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader_target) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + + opt.id = 'seq2-pre-{}-ft({})-gt'.format(opt.pre_percent, pretrain_data_mode) + assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0 + + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_pre_perc.py b/yc2_univl/backup/train_pre_perc.py new file mode 100644 index 0000000000000000000000000000000000000000..15f50480e382fc5704c5a6e019594b9478bcca11 --- /dev/null +++ b/yc2_univl/backup/train_pre_perc.py @@ -0,0 +1,593 @@ +# coding:utf-8 +''' +cfgs is the same as train.py, but need add an extra argument: pre_percent +recommend value: 0.1, 0.2, 0.4, 0.6, 0.8, 1 +''' + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) +CUDA_LAUNCH_BLOCKING=1 + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy +import random +import numpy as np + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"): + elements = [] + # breakpoint() + if len(opt.train_caption_file) == 2: + if 'puyu' in opt.train_caption_file[0]: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file[0]: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + elements.append('howto') + if 'yc2' in opt.train_caption_file[1]: + elements.append('yc2') + elif 'anet' in opt.train_caption_file[1]: + elements.append('anet') + else: + if 'yc2' in opt.train_caption_file: + elements.append('yc2') + elif 'anet' in opt.train_caption_file: + elements.append('anet') + elif 'howto' in opt.train_caption_file: + if 'puyu' in opt.train_caption_file: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + # elements.append('howto') + + if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]: + elements.append('clip') + elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]: + elements.append('univl') + # add pbox parameters + pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop" + elements.append(pbox_type) + elements.append(f"top{opt.top_frames}") + elements.append(f"r{opt.width_ratio}") + elements.append(f"iter{opt.iteration}") + elements.append(f"th{opt.width_th}") + return os.path.join(save_folder, '_'.join(elements) + '.json') + +def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + random.seed(worker_seed) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_folder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + # if use mixlm model + saved_path = construct_save_path(opt) + + if 'mixlm' in saved_path: + text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature') + os.makedirs(text_feature_folder_mixlm, exist_ok=True) + if 'clip' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj') + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip' + elif 'univl' in save_folder: + text_feature_folder_llama2 = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features' + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features' + + if not os.path.exists(saved_path): + llama2_pbox_path = saved_path.replace('mixlm', 'llama2') + puyu_pbox_path = saved_path.replace('mixlm', 'puyu') + with open(llama2_pbox_path, 'r') as f: + llama2_pbox = json.load(f) + with open(puyu_pbox_path, 'r') as f: + puyu_pbox = json.load(f) + + mixlm_pbox = {} + for video_key in llama2_pbox.keys(): + if llama2_pbox[video_key] is None and puyu_pbox[video_key] is None: + mixlm_pbox[video_key] = None + else: + if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']: + mixlm_pbox[video_key] = {'pbox': llama2_pbox[video_key]['pbox'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + mixlm_pbox[video_key] = {'pbox': puyu_pbox[video_key]['pbox'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + with open(saved_path, 'w') as f: + json.dump(mixlm_pbox, f) + + with open(saved_path, 'r') as f: + mixlm_pbox = json.load(f) + for video_key in mixlm_pbox.keys(): + if mixlm_pbox[video_key] is not None: + if mixlm_pbox[video_key]['llm'] == 'llama2': + llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy') + os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + else: + puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy') + os.symlink(puyu_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + opt.text_feature_folder[0] = text_feature_folder_mixlm + + if not opt.start_from: + backup_envir(save_folder, opt) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_subdata = PercentageSubsetDataset(train_dataset_1, opt.pre_percent) + train_dataset_2 = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_dataset = torch.utils.data.ConcatDataset([train_dataset_subdata, train_dataset_2]) + train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_all = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_dataset = PercentageSubsetDataset(train_dataset_all, opt.pre_percent) + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + g = torch.Generator() + g.manual_seed(0) + + train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + # breakpoint() + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box) + pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug) + + while True: + # if epoch > opt.start_refine_epoch: + # opt.refine_pseudo_box = refine_pseudo_box_copy + # opt.pseudo_box_aug = pseudo_box_aug_copy + # criterion.refine_pseudo_box = refine_pseudo_box_copy + # criterion.pseudo_box_aug = pseudo_box_aug_copy + # model.opt = opt + # else: + # opt.refine_pseudo_box = False + # opt.pseudo_box_aug = False + # criterion.refine_pseudo_box = False + # criterion.pseudo_box_aug = False + # model.opt = opt + + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 5: + # break + # else: + # trained_samples += 1 + + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + elif opt.criteria_for_best_ckpt == 'overall': + current_score = np.array(eval_score['Bleu_4']).mean() + \ + np.array(eval_score['CIDEr']).mean() + \ + np.array(eval_score['METEOR']).mean() + \ + 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) + + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-pre_perc-{}'.format(opt.pre_percent) + assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0 + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + # breakpoint() + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_seq.py b/yc2_univl/backup/train_seq.py new file mode 100644 index 0000000000000000000000000000000000000000..6a415e180bf2506f1cbef5ce6d0f6f4205e76203 --- /dev/null +++ b/yc2_univl/backup/train_seq.py @@ -0,0 +1,457 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + for train_loader in train_dataloaders: + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 25: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-train' + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/backup/train_seq_gt.py b/yc2_univl/backup/train_seq_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..235ae3a83169787f2b2db87e71f0fabe2dbc2dc1 --- /dev/null +++ b/yc2_univl/backup/train_seq_gt.py @@ -0,0 +1,480 @@ +# coding:utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import time +import torch +import os +import sys +import collections +import numpy as np +from tqdm import tqdm +import torch.optim as optim +from torch.utils.data import DataLoader +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) +# print(sys.path) + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer +from eval_utils import evaluate +import opts +from tensorboardX import SummaryWriter +from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed +from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset +from pdvc.pdvc import build +from collections import OrderedDict +from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup +import copy + +a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] +r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] + +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features +# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features + +def _init_fn(worker_id): + np.random.seed(12 + worker_id) + +def map_path(path): + path_backup = copy.deepcopy(path) + # breakpoint() + for i, folder in enumerate(a100_folder): + if folder in path: + path = path.replace(folder, r3090_folder[i]) + return path + if path == path_backup: + print('map failed') + exit(1) + + +def train(opt): + set_seed(opt.seed) + save_folder = build_floder(opt) + logger = create_logger(save_folder, 'train.log') + tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + + if not opt.start_from: + backup_envir(save_folder) + logger.info('backup evironment completed !') + + saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} + + # continue training + if opt.start_from: + opt.pretrain = False + infos_path = os.path.join(save_folder, 'info.json') + with open(infos_path) as f: + logger.info('Load info from {}'.format(infos_path)) + saved_info = json.load(f) + prev_opt = saved_info[opt.start_from_mode[:4]]['opt'] + + exclude_opt = ['start_from', 'start_from_mode', 'pretrain'] + for opt_name in prev_opt.keys(): + if opt_name not in exclude_opt: + vars(opt).update({opt_name: prev_opt.get(opt_name)}) + if prev_opt.get(opt_name) != vars(opt).get(opt_name): + logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), + vars(opt).get(opt_name))) + if len(opt.visual_feature_folder) == 2: + train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0], + [opt.visual_feature_folder[0]], + [opt.text_feature_folder[0]], + opt.dict_file, True, 'gt', + opt) + train_dataset_target = PropSeqDataset(opt.train_caption_file[1], + [opt.visual_feature_folder[1]], + [opt.text_feature_folder[1]], + opt.dict_file, True, 'gt', + opt) + # Create the dataset with the specified percentage + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + + # # Create a DataLoader for the subset dataset + # subset_dataloader = DataLoader(subset_data, batch_size=64, shuffle=True) + + train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + train_dataloaders = [train_loader_pretrain, train_loader_target] + # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2]) + # train_dataset.translator = train_dataset_1.translator + + else: + print(f'the script only support two dataset training while {len(opt.visual_feature_folder)} dataset folders are provided') + exit(1) + train_dataset_target = PropSeqDataset(opt.train_caption_file, + opt.visual_feature_folder, + opt.text_feature_folder, + opt.dict_file, True, 'gt', + opt) + train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + train_dataloaders = [train_loader_target] + + # val_dataset = PropSeqDataset(opt.val_caption_file, + # opt.visual_feature_folder, + # opt.text_feature_folder, + # opt.dict_file, False, 'gt', + # opt) + if not hasattr(opt, 'dict_file_val'): + opt.dict_file_val = opt.dict_file + opt.vocab_size_val = opt.vocab_size + + val_dataset = PropSeqDataset(opt.val_caption_file, + opt.visual_feature_folder_val, + opt.text_feature_folder_val, + opt.dict_file, False, 'gt', + opt) + + + val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval, + shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) + + epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0) + iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0) + best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5) + val_result_history = saved_info['history'].get('val_result_history', {}) + loss_history = saved_info['history'].get('loss_history', {}) + lr_history = saved_info['history'].get('lr_history', {}) + opt.current_lr = vars(opt).get('current_lr', opt.lr) + + # Build model + + model, criterion, contrastive_criterion, postprocessors = build(opt) + model.translator = train_dataset_target.translator + model.train() + + # Recover the parameters + if opt.start_from and (not opt.pretrain): + if opt.start_from_mode == 'best': + model_pth = torch.load(os.path.join(save_folder, 'model-best.pth')) + elif opt.start_from_mode == 'last': + model_pth = torch.load(os.path.join(save_folder, 'model-last.pth')) + logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration)) + model.load_state_dict(model_pth['model']) + + # Load the pre-trained model + if opt.pretrain and (not opt.start_from): + logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path)) + model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device)) + # query_weight = model_pth['model'].pop('query_embed.weight') + if opt.pretrain == 'encoder': + encoder_filter = model.get_filter_rule_for_encoder() + encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)} + model.load_state_dict(encoder_pth, strict=True) + elif opt.pretrain == 'decoder': + encoder_filter = model.get_filter_rule_for_encoder() + decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)} + model.load_state_dict(decoder_pth, strict=True) + pass + elif opt.pretrain == 'full': + # model_pth = transfer(model, model_pth) + model.load_state_dict(model_pth['model'], strict=True) + else: + raise ValueError("wrong value of opt.pretrain") + + + model.to(opt.device) + + # Decide which parameters need to be trained + # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen': + # for _, p in model.text_encoder.named_parameters(): + # p.requires_grad = False + # text_encoder_params = list(map(id, model.text_encoder.parameters())) + # other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters()) + # else: + # other_params = model.parameters() + other_params = model.parameters() + + training_params = [{'params': other_params, 'lr': opt.lr}] + + if opt.optimizer_type == 'adam': + optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay) + + elif opt.optimizer_type == 'adamw': + optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay) + + milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))] + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate) + + # Load tokenizer for text encoder + # for i in range(10): + # try: + # if opt.pretrained_language_model == 'UniVL': + # tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + # else: + # tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model) + # break + # except: + # print('download error in AutoTokenizer, retry...') + # time.sleep(1) + + if opt.start_from: + optimizer.load_state_dict(model_pth['optimizer']) + lr_scheduler.step(epoch-1) + + # print the args for debugging + print_opt(opt, model, logger) + print_alert_message('Strat training !', logger) + + loss_sum = OrderedDict() + bad_video_num = 0 + + start = time.time() + + weight_dict = criterion.weight_dict + logger.info('loss type: {}'.format(weight_dict.keys())) + logger.info('loss weights: {}'.format(weight_dict.values())) + + # Epoch-level iteration + + while True: + if True: + # scheduled sampling rate update + if epoch > opt.scheduled_sampling_start >= 0: + frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every + opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac, + opt.scheduled_sampling_max_prob) + model.caption_head.ss_prob = opt.ss_prob + + print('lr:{}'.format(float(opt.current_lr))) + pass + + # breakpoint() + # Batch-level iteration + opt.use_pseudo_box = False # True for howto, False for yc2/tasty, + opt.pseudo_box_aug = False + opt.refine_pseudo_box = False + # breakpoint() + + for train_loader in train_dataloaders: + opt.use_pseudo_box = not opt.use_pseudo_box + opt. + criterion.opt = opt + criterion.matcher.use_pseudo_box = opt.use_pseudo_box + + # if opt.use_pseudo_box: + # print('howto dataset') + # else: + # print('target dataset') + trained_samples = 0 + for dt in tqdm(train_loader, disable=opt.disable_tqdm): + # # for fast debugging + # if trained_samples > 25: + # break + # else: + # trained_samples += 1 + if opt.device=='cuda': + torch.cuda.synchronize(opt.device) + if opt.debug: + # each epoch contains less mini-batches for debugging + if (iteration + 1) % 5 == 0: + iteration += 1 + break + iteration += 1 + + optimizer.zero_grad() + dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} + dt['video_target'] = [ + {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in + dt['video_target']] + + # Add text encoder + # if opt.matcher_type == 'DTW' or opt.use_pseudo_box: + # captions = list() + # for video_sents in dt['cap_raw']: # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]] + # captions.extend(video_sents) + # text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len) + # text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} + # # text_encoder_input: {'input_ids': tensor([[ 101, 1996, 2307, ..., 0, 0, 0],...]), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0],...])} + # # len(text_encoder_input['input_ids']) = n * max_text_input_len + # dt['text_encoder_input'] = text_encoder_input + + # dt = collections.defaultdict(lambda: None, dt) # Commented to + + output, loss = model(dt, criterion, contrastive_criterion) + final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict) + # breakpoint() + final_loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) + + optimizer.step() + + for loss_k,loss_v in loss.items(): + loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item() + loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item() + + if opt.device=='cuda': + torch.cuda.synchronize() + + losses_log_every = int(len(train_loader) / 10) + + if opt.debug: + losses_log_every = 6 + + if iteration % losses_log_every == 0: + end = time.time() + for k in loss_sum.keys(): + loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item() + + logger.info( + "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}" + .format(opt.id, iteration, epoch, loss_sum, + (end - start) / losses_log_every, bad_video_num)) + + tf_writer.add_scalar('lr', opt.current_lr, iteration) + for loss_type in loss_sum.keys(): + tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration) + loss_history[iteration] = loss_sum + lr_history[iteration] = opt.current_lr + loss_sum = OrderedDict() + start = time.time() + bad_video_num = 0 + torch.cuda.empty_cache() + + # evaluation + if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save): + + # Save model + saved_pth = {'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + + if opt.save_all_checkpoint: + checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration)) + else: + checkpoint_path = os.path.join(save_folder, 'model-last.pth') + + torch.save(saved_pth, checkpoint_path) + + model.eval() + result_json_path = os.path.join(save_folder, 'prediction', + 'num{}_epoch{}.json'.format( + len(val_dataset), epoch)) + #eval_score, eval_loss = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + # add to tf summary + for key in eval_score.keys(): + tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration) + + # Huabin comment this part for avoiding reporting losses during evaluation + # for loss_type in eval_loss.keys(): + # tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration) + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info) + logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score)) + val_result_history[epoch] = {'eval_score': eval_score} + logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path)) + + # save the model parameter and of best epoch + if current_score >= best_val_score: + best_val_score = current_score + best_epoch = epoch + saved_info['best'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': best_epoch, + 'best_val_score': best_val_score, + 'result_json_path': result_json_path, + 'avg_proposal_num': eval_score['avg_proposal_number'], + 'Precision': eval_score['Precision'], + 'Recall': eval_score['Recall'] + } + + # suffix = "RL" if sc_flag else "CE" + torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth')) + logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration)) + + saved_info['last'] = {'opt': vars(opt), + 'iter': iteration, + 'epoch': epoch, + 'best_val_score': best_val_score, + } + saved_info['history'] = {'val_result_history': val_result_history, + 'loss_history': loss_history, + 'lr_history': lr_history, + # 'query_matched_fre_hist': query_matched_fre_hist, + } + with open(os.path.join(save_folder, 'info.json'), 'w') as f: + json.dump(saved_info, f) + logger.info('Save info to info.json') + + model.train() + + epoch += 1 + lr_scheduler.step() + opt.current_lr = optimizer.param_groups[0]['lr'] + torch.cuda.empty_cache() + # Stop criterion + if epoch >= opt.epoch: + # load Best model and conduct evaluation + print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + val_logger = create_logger(save_folder, 'val.log') + loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + model.load_state_dict(loaded_pth['model'], strict=True) + model.eval() + result_json_path = saved_info['best']['result_json_path'] + eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + if opt.caption_decoder_type == 'none': + current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + else: + if opt.criteria_for_best_ckpt == 'dvc': + current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + else: + current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + + _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + val_logger.info('\nBest Model Performance:\n' + print_info) + val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + tf_writer.close() + break + + return saved_info + + +if __name__ == '__main__': + opt = opts.parse_opts() + opt.id = 'seq-gt_percent_{}'.format(opt.ft_gt_percent) + assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0 + + if not hasattr(opt, 'visual_feature_folder_val'): + opt.visual_feature_folder_val = opt.visual_feature_folder + opt.text_feature_folder_val = opt.text_feature_folder + + if opt.map: + opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder] + opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder] + opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val] + opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val] + + if opt.gpu_id: + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id]) + if opt.disable_cudnn: + torch.backends.cudnn.enabled = False + + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos + # breakpoint() + train(opt) + diff --git a/yc2_univl/info.json b/yc2_univl/info.json new file mode 100644 index 0000000000000000000000000000000000000000..b0ef7913dc5040eadde57551f13d9f8da312bcff --- /dev/null +++ b/yc2_univl/info.json @@ -0,0 +1 @@ +{"best": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 15996, "epoch": 11, "best_val_score": 0.5868440997381064, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/prediction/num457_epoch11.json", "avg_proposal_num": -1, "Precision": 0.4513424333993264, "Recall": 0.30795469953703025}, "last": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 26660, "epoch": 19, "best_val_score": 0.5868440997381064}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.16894357888730638, "Bleu_2": 0.09902176620134434, "Bleu_3": 0.05312286436412136, "Bleu_4": 0.026212861867102137, "METEOR": 0.0791142699299577, "ROUGE_L": 0.15563765109454591, "CIDEr": 0.4087091055845523, "Recall": 0.1991554685892762, "Precision": 0.40083793546594454, "soda_c": 0.05642652494419026, "para_Bleu_1": 0.28013834967939705, "para_Bleu_2": 0.16393959632782257, "para_Bleu_3": 0.09809744775628881, "para_Bleu_4": 0.060378126412557326, "para_METEOR": 0.1286956339033507, "para_ROUGE_L": 0.29903071052996405, "para_CIDEr": 0.14675303603221324, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.18247710374533507, "Bleu_2": 0.10433126216854799, "Bleu_3": 0.05471515540980739, "Bleu_4": 0.025315544998990337, "METEOR": 0.08392673175891194, "ROUGE_L": 0.16810710582244187, "CIDEr": 0.48711946137609907, "Recall": 0.23104975652842194, "Precision": 0.4442690424090867, "soda_c": 0.06454827356060923, "para_Bleu_1": 0.27953804293947354, "para_Bleu_2": 0.1635778619591909, "para_Bleu_3": 0.09761782578266559, "para_Bleu_4": 0.060085255296605154, "para_METEOR": 0.13134445752685775, "para_ROUGE_L": 0.3040652157082556, "para_CIDEr": 0.15701615141849948, "avg_proposal_number": -1}}, "2": {"eval_score": {"Bleu_1": 0.18812761655735627, "Bleu_2": 0.11394688266117041, "Bleu_3": 0.06350983100569632, "Bleu_4": 0.03295035253718016, "METEOR": 0.08673497362280043, "ROUGE_L": 0.17099683701262633, "CIDEr": 0.534654554166069, "Recall": 0.2545535313519452, "Precision": 0.4357073390990242, "soda_c": 0.06940030844072555, "para_Bleu_1": 0.31911536052560924, "para_Bleu_2": 0.19074275606485158, "para_Bleu_3": 0.11503629156908896, "para_Bleu_4": 0.07096292455051724, "para_METEOR": 0.14141970569772275, "para_ROUGE_L": 0.3133292457236414, "para_CIDEr": 0.18756071216976763, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.19536023703614988, "Bleu_2": 0.11676341716851109, "Bleu_3": 0.06337153157323498, "Bleu_4": 0.031788948303475714, "METEOR": 0.09287502887069582, "ROUGE_L": 0.18168372139225142, "CIDEr": 0.5345089450528974, "Recall": 0.26186565000159123, "Precision": 0.4578470702650138, "soda_c": 0.06891495599002981, "para_Bleu_1": 0.3645537642333956, "para_Bleu_2": 0.21504928179111618, "para_Bleu_3": 0.1297486406737134, "para_Bleu_4": 0.08010111193897063, "para_METEOR": 0.1518569517959942, "para_ROUGE_L": 0.3241825281759821, "para_CIDEr": 0.22211083978975357, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.19366491706119263, "Bleu_2": 0.1161802397372496, "Bleu_3": 0.06381908710297783, "Bleu_4": 0.0310996008751752, "METEOR": 0.0900086447067842, "ROUGE_L": 0.1772625018945245, "CIDEr": 0.5329339889166991, "Recall": 0.27822837264850414, "Precision": 0.4414053002674447, "soda_c": 0.0725148309247326, "para_Bleu_1": 0.36779729697992286, "para_Bleu_2": 0.2189609464261768, "para_Bleu_3": 0.13170237886801614, "para_Bleu_4": 0.08102932652379062, "para_METEOR": 0.15287168689015676, "para_ROUGE_L": 0.32609559286330886, "para_CIDEr": 0.24981796796266917, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.19874944106127662, "Bleu_2": 0.12266046915797622, "Bleu_3": 0.07150852984916518, "Bleu_4": 0.036185181004552064, "METEOR": 0.09274687098087099, "ROUGE_L": 0.18413336093424784, "CIDEr": 0.5727051685734265, "Recall": 0.259037909270404, "Precision": 0.451289465457956, "soda_c": 0.07263494732248185, "para_Bleu_1": 0.32307562783294125, "para_Bleu_2": 0.1944214796418441, "para_Bleu_3": 0.11901149393254483, "para_Bleu_4": 0.07454555120453704, "para_METEOR": 0.14324209261218024, "para_ROUGE_L": 0.31918573126228, "para_CIDEr": 0.23096832321460165}}, "6": {"eval_score": {"Bleu_1": 0.2003309018825777, "Bleu_2": 0.1225756065112458, "Bleu_3": 0.06724461390362559, "Bleu_4": 0.033684328156599955, "METEOR": 0.0938288297360794, "ROUGE_L": 0.1832565856913202, "CIDEr": 0.5805494889367487, "Recall": 0.28578288505804933, "Precision": 0.4570872842207636, "soda_c": 0.07457933387713374, "para_Bleu_1": 0.3713316702717572, "para_Bleu_2": 0.22391267992808692, "para_Bleu_3": 0.1360620228892395, "para_Bleu_4": 0.08475146307949002, "para_METEOR": 0.15553928732702577, "para_ROUGE_L": 0.3279787647771023, "para_CIDEr": 0.24807495620487915, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.19584871429233122, "Bleu_2": 0.1203954133477019, "Bleu_3": 0.06765236989260215, "Bleu_4": 0.03515047236439923, "METEOR": 0.09347581038898298, "ROUGE_L": 0.18336361365161372, "CIDEr": 0.5642570328531701, "Recall": 0.287053410514844, "Precision": 0.4506790316418327, "soda_c": 0.07315525040409161, "para_Bleu_1": 0.39595219023577966, "para_Bleu_2": 0.23717913606151478, "para_Bleu_3": 0.14480681642134902, "para_Bleu_4": 0.0901695364250172, "para_METEOR": 0.16127903027678414, "para_ROUGE_L": 0.3324403291093838, "para_CIDEr": 0.23804687234043756, "avg_proposal_number": -1}}, "8": {"eval_score": {"Bleu_1": 0.19696025394358163, "Bleu_2": 0.12042554867022627, "Bleu_3": 0.06805715701089529, "Bleu_4": 0.034063345644385214, "METEOR": 0.09208296372249718, "ROUGE_L": 0.1803782633150628, "CIDEr": 0.5812603125344058, "Recall": 0.29169024735901117, "Precision": 0.44299129936438486, "soda_c": 0.07606608300691252, "para_Bleu_1": 0.383549187276652, "para_Bleu_2": 0.23192713278728125, "para_Bleu_3": 0.14217181061136971, "para_Bleu_4": 0.0892715976218228, "para_METEOR": 0.16074434603101373, "para_ROUGE_L": 0.3336567463040183, "para_CIDEr": 0.2859809872200661, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.20446290018298774, "Bleu_2": 0.12418412895577716, "Bleu_3": 0.06899010124646034, "Bleu_4": 0.03428116460131532, "METEOR": 0.09595521703655657, "ROUGE_L": 0.1876517650928566, "CIDEr": 0.5887832993219201, "Recall": 0.3017153873964599, "Precision": 0.4588439095550697, "soda_c": 0.07875391677883807, "para_Bleu_1": 0.3953706124668704, "para_Bleu_2": 0.24043007714841402, "para_Bleu_3": 0.14833197751929023, "para_Bleu_4": 0.09386644902900565, "para_METEOR": 0.16476396966168239, "para_ROUGE_L": 0.33760319454244797, "para_CIDEr": 0.31194480042956774, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.19267153393038786, "Bleu_2": 0.11732781330402656, "Bleu_3": 0.06746115616325608, "Bleu_4": 0.03425583839334337, "METEOR": 0.08963300348041837, "ROUGE_L": 0.17480207136309905, "CIDEr": 0.575137603362526, "Recall": 0.30432682743951917, "Precision": 0.4353044354138446, "soda_c": 0.07762847290423684, "para_Bleu_1": 0.393384019586376, "para_Bleu_2": 0.23835405770332685, "para_Bleu_3": 0.14545808678454117, "para_Bleu_4": 0.09085202435904723, "para_METEOR": 0.16354570345255123, "para_ROUGE_L": 0.3343729651839732, "para_CIDEr": 0.27098453497923136}}, "11": {"eval_score": {"Bleu_1": 0.1989422607268001, "Bleu_2": 0.12223038556953512, "Bleu_3": 0.06835990671747892, "Bleu_4": 0.03486159828438583, "METEOR": 0.09408978838449876, "ROUGE_L": 0.18200142867223945, "CIDEr": 0.593480700759431, "Recall": 0.30795469953703025, "Precision": 0.4513424333993264, "soda_c": 0.0796861065455984, "para_Bleu_1": 0.39594509057043764, "para_Bleu_2": 0.24087109399513515, "para_Bleu_3": 0.14790262814870953, "para_Bleu_4": 0.09321042711819619, "para_METEOR": 0.1655617051143519, "para_ROUGE_L": 0.3391051008488012, "para_CIDEr": 0.32807196750555834, "avg_proposal_number": -1}}, "12": {"eval_score": {"Bleu_1": 0.19294534256446427, "Bleu_2": 0.11789730285267924, "Bleu_3": 0.06601509377472357, "Bleu_4": 0.03274421971508606, "METEOR": 0.0906445074413136, "ROUGE_L": 0.17678145420382357, "CIDEr": 0.5750907875125135, "Recall": 0.3073352674556176, "Precision": 0.4434536834427428, "soda_c": 0.07896521325127955, "para_Bleu_1": 0.39483511792471604, "para_Bleu_2": 0.23988438429479647, "para_Bleu_3": 0.1464330354033768, "para_Bleu_4": 0.09122283851671699, "para_METEOR": 0.16480200992253577, "para_ROUGE_L": 0.33317486176302236, "para_CIDEr": 0.29080350784714515}}, "13": {"eval_score": {"Bleu_1": 0.1916652028982354, "Bleu_2": 0.11864819375256218, "Bleu_3": 0.06801290454817709, "Bleu_4": 0.03421778123301331, "METEOR": 0.08890100804282676, "ROUGE_L": 0.17229926562968575, "CIDEr": 0.5719694906113042, "Recall": 0.3115151404333572, "Precision": 0.42734448265082836, "soda_c": 0.07979305036983636, "para_Bleu_1": 0.3972508455506424, "para_Bleu_2": 0.24317507500304622, "para_Bleu_3": 0.1497047997976745, "para_Bleu_4": 0.09437727320664267, "para_METEOR": 0.16651343432042678, "para_ROUGE_L": 0.33875534436877147, "para_CIDEr": 0.29220356232363026}}, "14": {"eval_score": {"Bleu_1": 0.19012877786294885, "Bleu_2": 0.11743680046097797, "Bleu_3": 0.06623934110461578, "Bleu_4": 0.03314975306654321, "METEOR": 0.08857227272587216, "ROUGE_L": 0.17208518718096077, "CIDEr": 0.5689998070546577, "Recall": 0.3090681299310951, "Precision": 0.43095498593310433, "soda_c": 0.08081534748318767, "para_Bleu_1": 0.3949292262433903, "para_Bleu_2": 0.24183495416706074, "para_Bleu_3": 0.1493168425692173, "para_Bleu_4": 0.0941904023418332, "para_METEOR": 0.16661877157717606, "para_ROUGE_L": 0.3391544295873436, "para_CIDEr": 0.3057631644012313}}, "15": {"eval_score": {"Bleu_1": 0.1927355202990476, "Bleu_2": 0.11755729236198051, "Bleu_3": 0.06532950485231373, "Bleu_4": 0.0318670348131602, "METEOR": 0.08966953019840175, "ROUGE_L": 0.17549405824640266, "CIDEr": 0.5708533801009449, "Recall": 0.31055728552993345, "Precision": 0.4412863394810881, "soda_c": 0.08079399116249976, "para_Bleu_1": 0.3847850395827542, "para_Bleu_2": 0.23591168028694995, "para_Bleu_3": 0.14500000021146267, "para_Bleu_4": 0.09097906463153684, "para_METEOR": 0.1633729521776342, "para_ROUGE_L": 0.33764324525807, "para_CIDEr": 0.3225522700715415}}, "16": {"eval_score": {"Bleu_1": 0.1905629005997804, "Bleu_2": 0.11689699082903934, "Bleu_3": 0.06544029555928756, "Bleu_4": 0.03330988693345351, "METEOR": 0.08938496175202132, "ROUGE_L": 0.17298359351524648, "CIDEr": 0.5732307929342625, "Recall": 0.309604513071417, "Precision": 0.43046524955715343, "soda_c": 0.08056479007503722, "para_Bleu_1": 0.3975304274857351, "para_Bleu_2": 0.24253918136446623, "para_Bleu_3": 0.14848895422464012, "para_Bleu_4": 0.09337330751749118, "para_METEOR": 0.16677196164785574, "para_ROUGE_L": 0.33750187221117683, "para_CIDEr": 0.31278894258081524}}, "17": {"eval_score": {"Bleu_1": 0.19099469488969467, "Bleu_2": 0.11646897839764006, "Bleu_3": 0.06451308365995856, "Bleu_4": 0.032200079484133, "METEOR": 0.08912416771202449, "ROUGE_L": 0.1730757893125124, "CIDEr": 0.5693051160396969, "Recall": 0.3097042977992106, "Precision": 0.43274547601681085, "soda_c": 0.08084297498321232, "para_Bleu_1": 0.3924031546442418, "para_Bleu_2": 0.23911474626028398, "para_Bleu_3": 0.14600811918196227, "para_Bleu_4": 0.09107950853175292, "para_METEOR": 0.16594454181978452, "para_ROUGE_L": 0.33729101832099057, "para_CIDEr": 0.30892642009784}}, "18": {"eval_score": {"Bleu_1": 0.19191750615066444, "Bleu_2": 0.11783589874301872, "Bleu_3": 0.06597231596326529, "Bleu_4": 0.03167603834812624, "METEOR": 0.08996609888818348, "ROUGE_L": 0.1746391859525846, "CIDEr": 0.5689023016363987, "Recall": 0.31503357525649683, "Precision": 0.4376628112951966, "soda_c": 0.08097707611185051, "para_Bleu_1": 0.3977375551078834, "para_Bleu_2": 0.24323062675170298, "para_Bleu_3": 0.1488548587270082, "para_Bleu_4": 0.09292110149283073, "para_METEOR": 0.16716298804356167, "para_ROUGE_L": 0.33781551083855066, "para_CIDEr": 0.31014493696748857}}, "19": {"eval_score": {"Bleu_1": 0.1908811984292725, "Bleu_2": 0.11664270449592412, "Bleu_3": 0.06546844271584715, "Bleu_4": 0.03266470081303028, "METEOR": 0.08981101020496235, "ROUGE_L": 0.17382953846907112, "CIDEr": 0.5716745559959934, "Recall": 0.31292035599338697, "Precision": 0.4345220728699943, "soda_c": 0.08127095018359767, "para_Bleu_1": 0.40170065588267356, "para_Bleu_2": 0.2447870245859959, "para_Bleu_3": 0.14990588787772124, "para_Bleu_4": 0.09419227635900729, "para_METEOR": 0.16780671784283924, "para_ROUGE_L": 0.33845945539662686, "para_CIDEr": 0.3198675630646056}}}, "loss_history": {"133": {"loss_ce": 0.336, "loss_counter": 0.129, "loss_bbox": 0.039, "loss_giou": 0.368, "loss_self_iou": 0.028, "cardinality_error": 7.797, "loss_ce_0": 0.337, "loss_counter_0": 0.13, "loss_bbox_0": 0.041, "loss_giou_0": 0.381, "loss_self_iou_0": 0.03, "cardinality_error_0": 7.797, "loss_caption_0": 2.755, "loss_caption": 2.681, "total_loss": 15.341}, "266": {"loss_ce": 0.324, "loss_counter": 0.129, "loss_bbox": 0.036, "loss_giou": 0.369, "loss_self_iou": 0.018, "cardinality_error": 7.812, "loss_ce_0": 0.341, "loss_counter_0": 0.132, "loss_bbox_0": 0.039, "loss_giou_0": 0.38, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.812, "loss_caption_0": 2.803, "loss_caption": 2.638, "total_loss": 15.341}, "399": {"loss_ce": 0.312, "loss_counter": 0.13, "loss_bbox": 0.039, "loss_giou": 0.375, "loss_self_iou": 0.02, "cardinality_error": 7.835, "loss_ce_0": 0.324, "loss_counter_0": 0.132, "loss_bbox_0": 0.043, "loss_giou_0": 0.395, "loss_self_iou_0": 0.021, "cardinality_error_0": 7.835, "loss_caption_0": 2.81, "loss_caption": 2.676, "total_loss": 15.459}, "532": {"loss_ce": 0.307, "loss_counter": 0.133, "loss_bbox": 0.044, "loss_giou": 0.394, "loss_self_iou": 0.02, "cardinality_error": 7.902, "loss_ce_0": 0.319, "loss_counter_0": 0.133, "loss_bbox_0": 0.05, "loss_giou_0": 0.421, "loss_self_iou_0": 0.026, "cardinality_error_0": 7.902, "loss_caption_0": 2.817, "loss_caption": 2.654, "total_loss": 15.588}, "665": {"loss_ce": 0.312, "loss_counter": 0.135, "loss_bbox": 0.034, "loss_giou": 0.345, "loss_self_iou": 0.017, "cardinality_error": 7.805, "loss_ce_0": 0.319, "loss_counter_0": 0.131, "loss_bbox_0": 0.038, "loss_giou_0": 0.372, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.805, "loss_caption_0": 2.758, "loss_caption": 2.635, "total_loss": 15.049}, "798": {"loss_ce": 0.321, "loss_counter": 0.125, "loss_bbox": 0.03, "loss_giou": 0.319, "loss_self_iou": 0.015, "cardinality_error": 7.774, "loss_ce_0": 0.331, "loss_counter_0": 0.124, "loss_bbox_0": 0.032, "loss_giou_0": 0.344, "loss_self_iou_0": 0.015, "cardinality_error_0": 7.774, "loss_caption_0": 2.66, "loss_caption": 2.559, "total_loss": 14.519}, "931": {"loss_ce": 0.327, "loss_counter": 0.122, "loss_bbox": 0.027, "loss_giou": 0.306, "loss_self_iou": 0.011, "cardinality_error": 7.865, "loss_ce_0": 0.346, "loss_counter_0": 0.123, "loss_bbox_0": 0.029, "loss_giou_0": 0.327, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.865, "loss_caption_0": 2.54, "loss_caption": 2.468, "total_loss": 14.017}, "1064": {"loss_ce": 0.331, "loss_counter": 0.121, "loss_bbox": 0.027, "loss_giou": 0.292, "loss_self_iou": 0.01, "cardinality_error": 7.579, "loss_ce_0": 0.345, "loss_counter_0": 0.127, "loss_bbox_0": 0.028, "loss_giou_0": 0.311, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.579, "loss_caption_0": 2.639, "loss_caption": 2.626, "total_loss": 14.419}, "1197": {"loss_ce": 0.325, "loss_counter": 0.118, "loss_bbox": 0.026, "loss_giou": 0.296, "loss_self_iou": 0.011, "cardinality_error": 7.241, "loss_ce_0": 0.339, "loss_counter_0": 0.121, "loss_bbox_0": 0.028, "loss_giou_0": 0.317, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.241, "loss_caption_0": 2.501, "loss_caption": 2.496, "total_loss": 13.892}, "1330": {"loss_ce": 0.327, "loss_counter": 0.126, "loss_bbox": 0.026, "loss_giou": 0.304, "loss_self_iou": 0.011, "cardinality_error": 7.94, "loss_ce_0": 0.334, "loss_counter_0": 0.127, "loss_bbox_0": 0.029, "loss_giou_0": 0.332, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.94, "loss_caption_0": 2.635, "loss_caption": 2.619, "total_loss": 14.504}, "1463": {"loss_ce": 0.322, "loss_counter": 0.128, "loss_bbox": 0.026, "loss_giou": 0.301, "loss_self_iou": 0.011, "cardinality_error": 7.699, "loss_ce_0": 0.335, "loss_counter_0": 0.129, "loss_bbox_0": 0.026, "loss_giou_0": 0.316, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.699, "loss_caption_0": 2.448, "loss_caption": 2.462, "total_loss": 13.729}, "1596": {"loss_ce": 0.311, "loss_counter": 0.126, "loss_bbox": 0.022, "loss_giou": 0.284, "loss_self_iou": 0.01, "cardinality_error": 8.233, "loss_ce_0": 0.322, "loss_counter_0": 0.123, "loss_bbox_0": 0.024, "loss_giou_0": 0.31, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.233, "loss_caption_0": 2.348, "loss_caption": 2.348, "total_loss": 13.16}, "1729": {"loss_ce": 0.311, "loss_counter": 0.124, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 7.632, "loss_ce_0": 0.32, "loss_counter_0": 0.124, "loss_bbox_0": 0.026, "loss_giou_0": 0.307, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.632, "loss_caption_0": 2.363, "loss_caption": 2.353, "total_loss": 13.14}, "1862": {"loss_ce": 0.316, "loss_counter": 0.12, "loss_bbox": 0.023, "loss_giou": 0.268, "loss_self_iou": 0.01, "cardinality_error": 7.609, "loss_ce_0": 0.32, "loss_counter_0": 0.119, "loss_bbox_0": 0.025, "loss_giou_0": 0.29, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.609, "loss_caption_0": 2.439, "loss_caption": 2.419, "total_loss": 13.343}, "1995": {"loss_ce": 0.314, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.281, "loss_self_iou": 0.009, "cardinality_error": 7.541, "loss_ce_0": 0.322, "loss_counter_0": 0.122, "loss_bbox_0": 0.025, "loss_giou_0": 0.309, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.541, "loss_caption_0": 2.503, "loss_caption": 2.503, "total_loss": 13.766}, "2128": {"loss_ce": 0.316, "loss_counter": 0.126, "loss_bbox": 0.024, "loss_giou": 0.284, "loss_self_iou": 0.009, "cardinality_error": 7.789, "loss_ce_0": 0.324, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.301, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.789, "loss_caption_0": 2.5, "loss_caption": 2.493, "total_loss": 13.73}, "2261": {"loss_ce": 0.31, "loss_counter": 0.122, "loss_bbox": 0.023, "loss_giou": 0.285, "loss_self_iou": 0.012, "cardinality_error": 7.902, "loss_ce_0": 0.316, "loss_counter_0": 0.12, "loss_bbox_0": 0.025, "loss_giou_0": 0.304, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.902, "loss_caption_0": 2.425, "loss_caption": 2.424, "total_loss": 13.426}, "2394": {"loss_ce": 0.315, "loss_counter": 0.126, "loss_bbox": 0.025, "loss_giou": 0.29, "loss_self_iou": 0.011, "cardinality_error": 7.534, "loss_ce_0": 0.323, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.308, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.534, "loss_caption_0": 2.439, "loss_caption": 2.435, "total_loss": 13.54}, "2527": {"loss_ce": 0.313, "loss_counter": 0.125, "loss_bbox": 0.023, "loss_giou": 0.276, "loss_self_iou": 0.009, "cardinality_error": 7.647, "loss_ce_0": 0.319, "loss_counter_0": 0.123, "loss_bbox_0": 0.025, "loss_giou_0": 0.296, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.647, "loss_caption_0": 2.454, "loss_caption": 2.455, "total_loss": 13.492}, "2660": {"loss_ce": 0.313, "loss_counter": 0.131, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 8.0, "loss_ce_0": 0.317, "loss_counter_0": 0.128, "loss_bbox_0": 0.026, "loss_giou_0": 0.294, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.0, "loss_caption_0": 2.464, "loss_caption": 2.451, "total_loss": 13.487}, "2793": {"loss_ce": 0.309, "loss_counter": 0.119, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.01, "cardinality_error": 7.556, "loss_ce_0": 0.312, "loss_counter_0": 0.118, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.556, "loss_caption_0": 2.27, "loss_caption": 2.276, "total_loss": 12.632}, "2926": {"loss_ce": 0.313, "loss_counter": 0.121, "loss_bbox": 0.023, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.444, "loss_ce_0": 0.317, "loss_counter_0": 0.118, "loss_bbox_0": 0.025, "loss_giou_0": 0.287, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.444, "loss_caption_0": 2.276, "loss_caption": 2.291, "total_loss": 12.726}, "3059": {"loss_ce": 0.298, "loss_counter": 0.127, "loss_bbox": 0.02, "loss_giou": 0.272, "loss_self_iou": 0.008, "cardinality_error": 8.135, "loss_ce_0": 0.302, "loss_counter_0": 0.125, "loss_bbox_0": 0.023, "loss_giou_0": 0.296, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.135, "loss_caption_0": 2.364, "loss_caption": 2.364, "total_loss": 13.057}, "3192": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.699, "loss_ce_0": 0.306, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.286, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.367, "loss_caption": 2.381, "total_loss": 13.038}, "3325": {"loss_ce": 0.3, "loss_counter": 0.123, "loss_bbox": 0.021, "loss_giou": 0.274, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.3, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.291, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.323, "loss_caption": 2.33, "total_loss": 12.887}, "3458": {"loss_ce": 0.31, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.277, "loss_self_iou": 0.01, "cardinality_error": 7.865, "loss_ce_0": 0.31, "loss_counter_0": 0.123, "loss_bbox_0": 0.023, "loss_giou_0": 0.295, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.865, "loss_caption_0": 2.351, "loss_caption": 2.341, "total_loss": 13.038}, "3591": {"loss_ce": 0.306, "loss_counter": 0.114, "loss_bbox": 0.022, "loss_giou": 0.263, "loss_self_iou": 0.009, "cardinality_error": 7.586, "loss_ce_0": 0.308, "loss_counter_0": 0.114, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.222, "loss_caption": 2.223, "total_loss": 12.425}, "3724": {"loss_ce": 0.305, "loss_counter": 0.123, "loss_bbox": 0.023, "loss_giou": 0.265, "loss_self_iou": 0.009, "cardinality_error": 7.624, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.624, "loss_caption_0": 2.38, "loss_caption": 2.368, "total_loss": 13.014}, "3857": {"loss_ce": 0.306, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.264, "loss_self_iou": 0.009, "cardinality_error": 7.489, "loss_ce_0": 0.312, "loss_counter_0": 0.114, "loss_bbox_0": 0.023, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.489, "loss_caption_0": 2.343, "loss_caption": 2.344, "total_loss": 12.897}, "3990": {"loss_ce": 0.299, "loss_counter": 0.134, "loss_bbox": 0.02, "loss_giou": 0.268, "loss_self_iou": 0.012, "cardinality_error": 8.301, "loss_ce_0": 0.299, "loss_counter_0": 0.131, "loss_bbox_0": 0.022, "loss_giou_0": 0.289, "loss_self_iou_0": 0.013, "cardinality_error_0": 8.301, "loss_caption_0": 2.327, "loss_caption": 2.346, "total_loss": 12.9}, "4123": {"loss_ce": 0.305, "loss_counter": 0.129, "loss_bbox": 0.021, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.925, "loss_ce_0": 0.307, "loss_counter_0": 0.126, "loss_bbox_0": 0.023, "loss_giou_0": 0.275, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 2.272, "loss_caption": 2.28, "total_loss": 12.579}, "4256": {"loss_ce": 0.308, "loss_counter": 0.121, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.632, "loss_ce_0": 0.31, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.276, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.632, "loss_caption_0": 2.247, "loss_caption": 2.252, "total_loss": 12.484}, "4389": {"loss_ce": 0.305, "loss_counter": 0.12, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.011, "cardinality_error": 7.526, "loss_ce_0": 0.309, "loss_counter_0": 0.119, "loss_bbox_0": 0.022, "loss_giou_0": 0.272, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.526, "loss_caption_0": 2.194, "loss_caption": 2.205, "total_loss": 12.273}, "4522": {"loss_ce": 0.305, "loss_counter": 0.115, "loss_bbox": 0.019, "loss_giou": 0.248, "loss_self_iou": 0.007, "cardinality_error": 7.519, "loss_ce_0": 0.303, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.335, "loss_caption": 2.326, "total_loss": 12.689}, "4655": {"loss_ce": 0.297, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.263, "loss_self_iou": 0.008, "cardinality_error": 7.97, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.022, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.97, "loss_caption_0": 2.254, "loss_caption": 2.267, "total_loss": 12.545}, "4788": {"loss_ce": 0.308, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.253, "loss_self_iou": 0.008, "cardinality_error": 7.481, "loss_ce_0": 0.308, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.481, "loss_caption_0": 2.208, "loss_caption": 2.195, "total_loss": 12.24}, "4921": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.262, "loss_self_iou": 0.01, "cardinality_error": 7.842, "loss_ce_0": 0.305, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.284, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.842, "loss_caption_0": 2.186, "loss_caption": 2.196, "total_loss": 12.289}, "5054": {"loss_ce": 0.303, "loss_counter": 0.121, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.009, "cardinality_error": 7.887, "loss_ce_0": 0.305, "loss_counter_0": 0.12, "loss_bbox_0": 0.023, "loss_giou_0": 0.271, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.887, "loss_caption_0": 2.242, "loss_caption": 2.239, "total_loss": 12.422}, "5187": {"loss_ce": 0.303, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.262, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.305, "loss_counter_0": 0.123, "loss_bbox_0": 0.022, "loss_giou_0": 0.277, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.25, "loss_caption": 2.246, "total_loss": 12.483}, "5320": {"loss_ce": 0.299, "loss_counter": 0.12, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.729, "loss_caption_0": 2.287, "loss_caption": 2.298, "total_loss": 12.64}, "5453": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.022, "loss_giou": 0.25, "loss_self_iou": 0.011, "cardinality_error": 7.519, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.023, "loss_giou_0": 0.269, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.519, "loss_caption_0": 2.175, "loss_caption": 2.176, "total_loss": 12.088}, "5586": {"loss_ce": 0.294, "loss_counter": 0.12, "loss_bbox": 0.018, "loss_giou": 0.252, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.292, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.274, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 2.16, "loss_caption": 2.132, "total_loss": 11.979}, "5719": {"loss_ce": 0.305, "loss_counter": 0.13, "loss_bbox": 0.02, "loss_giou": 0.255, "loss_self_iou": 0.008, "cardinality_error": 8.451, "loss_ce_0": 0.302, "loss_counter_0": 0.127, "loss_bbox_0": 0.021, "loss_giou_0": 0.273, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.451, "loss_caption_0": 2.166, "loss_caption": 2.164, "total_loss": 12.113}, "5852": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.246, "loss_self_iou": 0.007, "cardinality_error": 7.835, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.267, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.835, "loss_caption_0": 2.122, "loss_caption": 2.111, "total_loss": 11.841}, "5985": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.243, "loss_self_iou": 0.009, "cardinality_error": 7.474, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.263, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.474, "loss_caption_0": 2.149, "loss_caption": 2.14, "total_loss": 11.926}, "6118": {"loss_ce": 0.3, "loss_counter": 0.113, "loss_bbox": 0.018, "loss_giou": 0.241, "loss_self_iou": 0.008, "cardinality_error": 7.639, "loss_ce_0": 0.302, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.259, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.639, "loss_caption_0": 2.235, "loss_caption": 2.215, "total_loss": 12.218}, "6251": {"loss_ce": 0.301, "loss_counter": 0.125, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 7.857, "loss_ce_0": 0.301, "loss_counter_0": 0.125, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.857, "loss_caption_0": 2.235, "loss_caption": 2.226, "total_loss": 12.328}, "6384": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.02, "loss_giou": 0.246, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.301, "loss_counter_0": 0.124, "loss_bbox_0": 0.021, "loss_giou_0": 0.265, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 2.208, "loss_caption": 2.183, "total_loss": 12.157}, "6517": {"loss_ce": 0.297, "loss_counter": 0.12, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.872, "loss_ce_0": 0.295, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.271, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.872, "loss_caption_0": 2.135, "loss_caption": 2.155, "total_loss": 11.99}, "6650": {"loss_ce": 0.297, "loss_counter": 0.112, "loss_bbox": 0.021, "loss_giou": 0.244, "loss_self_iou": 0.008, "cardinality_error": 7.398, "loss_ce_0": 0.297, "loss_counter_0": 0.112, "loss_bbox_0": 0.023, "loss_giou_0": 0.26, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.398, "loss_caption_0": 2.205, "loss_caption": 2.202, "total_loss": 12.127}, "6783": {"loss_ce": 0.29, "loss_counter": 0.117, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.586, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.02, "loss_caption": 2.014, "total_loss": 11.332}, "6916": {"loss_ce": 0.301, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.249, "loss_self_iou": 0.008, "cardinality_error": 7.519, "loss_ce_0": 0.302, "loss_counter_0": 0.116, "loss_bbox_0": 0.023, "loss_giou_0": 0.264, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.118, "loss_caption": 2.101, "total_loss": 11.817}, "7049": {"loss_ce": 0.294, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.25, "loss_self_iou": 0.007, "cardinality_error": 7.699, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.02, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.105, "loss_caption": 2.111, "total_loss": 11.78}, "7182": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.242, "loss_self_iou": 0.008, "cardinality_error": 7.594, "loss_ce_0": 0.288, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.594, "loss_caption_0": 2.194, "loss_caption": 2.195, "total_loss": 12.045}, "7315": {"loss_ce": 0.29, "loss_counter": 0.123, "loss_bbox": 0.02, "loss_giou": 0.254, "loss_self_iou": 0.009, "cardinality_error": 8.301, "loss_ce_0": 0.291, "loss_counter_0": 0.123, "loss_bbox_0": 0.02, "loss_giou_0": 0.268, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.301, "loss_caption_0": 2.096, "loss_caption": 2.09, "total_loss": 11.741}, "7448": {"loss_ce": 0.296, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.234, "loss_self_iou": 0.006, "cardinality_error": 7.677, "loss_ce_0": 0.292, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.251, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.677, "loss_caption_0": 2.076, "loss_caption": 2.063, "total_loss": 11.513}, "7581": {"loss_ce": 0.298, "loss_counter": 0.116, "loss_bbox": 0.019, "loss_giou": 0.238, "loss_self_iou": 0.008, "cardinality_error": 7.534, "loss_ce_0": 0.295, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.534, "loss_caption_0": 2.114, "loss_caption": 2.112, "total_loss": 11.718}, "7714": {"loss_ce": 0.295, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.677, "loss_ce_0": 0.291, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.677, "loss_caption_0": 2.167, "loss_caption": 2.179, "total_loss": 11.932}, "7847": {"loss_ce": 0.293, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.252, "loss_self_iou": 0.009, "cardinality_error": 8.053, "loss_ce_0": 0.289, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.269, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.053, "loss_caption_0": 2.106, "loss_caption": 2.115, "total_loss": 11.804}, "7980": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.249, "loss_self_iou": 0.007, "cardinality_error": 7.902, "loss_ce_0": 0.295, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.902, "loss_caption_0": 2.151, "loss_caption": 2.153, "total_loss": 11.979}, "8113": {"loss_ce": 0.297, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.236, "loss_self_iou": 0.008, "cardinality_error": 7.617, "loss_ce_0": 0.295, "loss_counter_0": 0.112, "loss_bbox_0": 0.021, "loss_giou_0": 0.257, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.617, "loss_caption_0": 2.036, "loss_caption": 2.044, "total_loss": 11.427}, "8246": {"loss_ce": 0.286, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.237, "loss_self_iou": 0.006, "cardinality_error": 7.827, "loss_ce_0": 0.283, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 2.055, "loss_caption": 2.057, "total_loss": 11.458}, "8379": {"loss_ce": 0.29, "loss_counter": 0.118, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.286, "loss_counter_0": 0.117, "loss_bbox_0": 0.019, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 2.046, "loss_caption": 2.041, "total_loss": 11.331}, "8512": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.228, "loss_self_iou": 0.006, "cardinality_error": 7.654, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.245, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.654, "loss_caption_0": 1.991, "loss_caption": 1.997, "total_loss": 11.118}, "8645": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 8.068, "loss_ce_0": 0.287, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.068, "loss_caption_0": 2.094, "loss_caption": 2.097, "total_loss": 11.714}, "8778": {"loss_ce": 0.288, "loss_counter": 0.121, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.008, "cardinality_error": 8.008, "loss_ce_0": 0.286, "loss_counter_0": 0.121, "loss_bbox_0": 0.02, "loss_giou_0": 0.258, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.008, "loss_caption_0": 2.092, "loss_caption": 2.092, "total_loss": 11.63}, "8911": {"loss_ce": 0.298, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.338, "loss_ce_0": 0.297, "loss_counter_0": 0.114, "loss_bbox_0": 0.02, "loss_giou_0": 0.248, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.338, "loss_caption_0": 2.051, "loss_caption": 2.054, "total_loss": 11.446}, "9044": {"loss_ce": 0.292, "loss_counter": 0.105, "loss_bbox": 0.02, "loss_giou": 0.227, "loss_self_iou": 0.008, "cardinality_error": 7.226, "loss_ce_0": 0.292, "loss_counter_0": 0.105, "loss_bbox_0": 0.021, "loss_giou_0": 0.243, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.226, "loss_caption_0": 2.08, "loss_caption": 2.084, "total_loss": 11.478}, "9177": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.254, "loss_self_iou": 0.007, "cardinality_error": 7.977, "loss_ce_0": 0.288, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.275, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.977, "loss_caption_0": 2.046, "loss_caption": 2.031, "total_loss": 11.546}, "9310": {"loss_ce": 0.28, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.236, "loss_self_iou": 0.006, "cardinality_error": 7.97, "loss_ce_0": 0.281, "loss_counter_0": 0.118, "loss_bbox_0": 0.019, "loss_giou_0": 0.252, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.97, "loss_caption_0": 1.986, "loss_caption": 1.995, "total_loss": 11.157}, "9443": {"loss_ce": 0.294, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.226, "loss_self_iou": 0.006, "cardinality_error": 7.617, "loss_ce_0": 0.292, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.239, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.617, "loss_caption_0": 2.065, "loss_caption": 2.061, "total_loss": 11.394}, "9576": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.02, "loss_giou": 0.231, "loss_self_iou": 0.006, "cardinality_error": 7.917, "loss_ce_0": 0.284, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.252, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.917, "loss_caption_0": 1.977, "loss_caption": 1.974, "total_loss": 11.093}, "9709": {"loss_ce": 0.291, "loss_counter": 0.117, "loss_bbox": 0.016, "loss_giou": 0.224, "loss_self_iou": 0.006, "cardinality_error": 8.098, "loss_ce_0": 0.29, "loss_counter_0": 0.117, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.098, "loss_caption_0": 2.051, "loss_caption": 2.063, "total_loss": 11.373}, "9842": {"loss_ce": 0.288, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.242, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.286, "loss_counter_0": 0.11, "loss_bbox_0": 0.02, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 1.939, "loss_caption": 1.953, "total_loss": 11.058}, "9975": {"loss_ce": 0.28, "loss_counter": 0.116, "loss_bbox": 0.017, "loss_giou": 0.238, "loss_self_iou": 0.006, "cardinality_error": 8.233, "loss_ce_0": 0.281, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.255, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.233, "loss_caption_0": 2.024, "loss_caption": 2.026, "total_loss": 11.31}, "10108": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.018, "loss_giou": 0.232, "loss_self_iou": 0.006, "cardinality_error": 7.466, "loss_ce_0": 0.279, "loss_counter_0": 0.112, "loss_bbox_0": 0.02, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.466, "loss_caption_0": 1.878, "loss_caption": 1.882, "total_loss": 10.667}, "10241": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.018, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.722, "loss_ce_0": 0.282, "loss_counter_0": 0.119, "loss_bbox_0": 0.019, "loss_giou_0": 0.253, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.722, "loss_caption_0": 1.984, "loss_caption": 1.988, "total_loss": 11.165}, "10374": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.225, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.285, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 2.089, "loss_caption": 2.094, "total_loss": 11.498}, "10507": {"loss_ce": 0.287, "loss_counter": 0.113, "loss_bbox": 0.019, "loss_giou": 0.22, "loss_self_iou": 0.007, "cardinality_error": 7.564, "loss_ce_0": 0.283, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.564, "loss_caption_0": 1.936, "loss_caption": 1.935, "total_loss": 10.84}, "10640": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.232, "loss_self_iou": 0.008, "cardinality_error": 7.549, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.022, "loss_giou_0": 0.249, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.549, "loss_caption_0": 2.041, "loss_caption": 2.042, "total_loss": 11.323}, "10773": {"loss_ce": 0.279, "loss_counter": 0.114, "loss_bbox": 0.017, "loss_giou": 0.235, "loss_self_iou": 0.006, "cardinality_error": 7.94, "loss_ce_0": 0.278, "loss_counter_0": 0.115, "loss_bbox_0": 0.018, "loss_giou_0": 0.253, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.94, "loss_caption_0": 1.851, "loss_caption": 1.84, "total_loss": 10.561}, "10906": {"loss_ce": 0.279, "loss_counter": 0.109, "loss_bbox": 0.017, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.218, "loss_ce_0": 0.278, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.218, "loss_caption_0": 1.945, "loss_caption": 1.948, "total_loss": 10.791}, "11039": {"loss_ce": 0.288, "loss_counter": 0.108, "loss_bbox": 0.017, "loss_giou": 0.207, "loss_self_iou": 0.006, "cardinality_error": 7.579, "loss_ce_0": 0.283, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.579, "loss_caption_0": 1.92, "loss_caption": 1.927, "total_loss": 10.664}, "11172": {"loss_ce": 0.28, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.451, "loss_ce_0": 0.279, "loss_counter_0": 0.11, "loss_bbox_0": 0.019, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.451, "loss_caption_0": 1.91, "loss_caption": 1.9, "total_loss": 10.635}, "11305": {"loss_ce": 0.278, "loss_counter": 0.125, "loss_bbox": 0.017, "loss_giou": 0.233, "loss_self_iou": 0.006, "cardinality_error": 8.09, "loss_ce_0": 0.276, "loss_counter_0": 0.126, "loss_bbox_0": 0.018, "loss_giou_0": 0.244, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.09, "loss_caption_0": 1.876, "loss_caption": 1.877, "total_loss": 10.648}, "11438": {"loss_ce": 0.273, "loss_counter": 0.113, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.269, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.981, "loss_caption": 1.968, "total_loss": 10.865}, "11571": {"loss_ce": 0.281, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.006, "cardinality_error": 7.699, "loss_ce_0": 0.277, "loss_counter_0": 0.115, "loss_bbox_0": 0.02, "loss_giou_0": 0.243, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.699, "loss_caption_0": 1.833, "loss_caption": 1.846, "total_loss": 10.461}, "11704": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.21, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.91, "loss_caption": 1.915, "total_loss": 10.628}, "11837": {"loss_ce": 0.271, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.215, "loss_self_iou": 0.007, "cardinality_error": 8.0, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.0, "loss_caption_0": 1.936, "loss_caption": 1.939, "total_loss": 10.726}, "11970": {"loss_ce": 0.272, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.22, "loss_self_iou": 0.006, "cardinality_error": 8.158, "loss_ce_0": 0.27, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.158, "loss_caption_0": 1.953, "loss_caption": 1.962, "total_loss": 10.881}, "12103": {"loss_ce": 0.275, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.216, "loss_self_iou": 0.006, "cardinality_error": 8.038, "loss_ce_0": 0.274, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.038, "loss_caption_0": 1.832, "loss_caption": 1.845, "total_loss": 10.35}, "12236": {"loss_ce": 0.272, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.206, "loss_self_iou": 0.005, "cardinality_error": 7.812, "loss_ce_0": 0.266, "loss_counter_0": 0.111, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.968, "loss_caption": 1.959, "total_loss": 10.757}, "12369": {"loss_ce": 0.273, "loss_counter": 0.118, "loss_bbox": 0.016, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.27, "loss_counter_0": 0.118, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 1.89, "loss_caption": 1.903, "total_loss": 10.534}, "12502": {"loss_ce": 0.27, "loss_counter": 0.108, "loss_bbox": 0.016, "loss_giou": 0.205, "loss_self_iou": 0.006, "cardinality_error": 7.684, "loss_ce_0": 0.268, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.684, "loss_caption_0": 1.903, "loss_caption": 1.905, "total_loss": 10.519}, "12635": {"loss_ce": 0.27, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.218, "loss_self_iou": 0.005, "cardinality_error": 7.947, "loss_ce_0": 0.269, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.232, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.947, "loss_caption_0": 1.822, "loss_caption": 1.826, "total_loss": 10.284}, "12768": {"loss_ce": 0.277, "loss_counter": 0.111, "loss_bbox": 0.017, "loss_giou": 0.219, "loss_self_iou": 0.008, "cardinality_error": 7.669, "loss_ce_0": 0.276, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.669, "loss_caption_0": 1.905, "loss_caption": 1.909, "total_loss": 10.662}, "12901": {"loss_ce": 0.269, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.639, "loss_ce_0": 0.267, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.639, "loss_caption_0": 1.856, "loss_caption": 1.863, "total_loss": 10.344}, "13034": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.274, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.841, "loss_caption": 1.841, "total_loss": 10.356}, "13167": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.018, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.406, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.019, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.406, "loss_caption_0": 1.931, "loss_caption": 1.927, "total_loss": 10.663}, "13300": {"loss_ce": 0.274, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.005, "cardinality_error": 7.737, "loss_ce_0": 0.272, "loss_counter_0": 0.113, "loss_bbox_0": 0.019, "loss_giou_0": 0.23, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.737, "loss_caption_0": 1.853, "loss_caption": 1.849, "total_loss": 10.379}, "13433": {"loss_ce": 0.271, "loss_counter": 0.112, "loss_bbox": 0.017, "loss_giou": 0.217, "loss_self_iou": 0.006, "cardinality_error": 7.835, "loss_ce_0": 0.267, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.835, "loss_caption_0": 1.804, "loss_caption": 1.811, "total_loss": 10.223}, "13566": {"loss_ce": 0.266, "loss_counter": 0.116, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.005, "cardinality_error": 7.774, "loss_ce_0": 0.266, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.221, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.774, "loss_caption_0": 1.884, "loss_caption": 1.887, "total_loss": 10.42}, "13699": {"loss_ce": 0.261, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.201, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.017, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.729, "loss_caption_0": 1.823, "loss_caption": 1.806, "total_loss": 10.083}, "13832": {"loss_ce": 0.269, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.699, "loss_ce_0": 0.271, "loss_counter_0": 0.112, "loss_bbox_0": 0.017, "loss_giou_0": 0.228, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.855, "loss_caption": 1.857, "total_loss": 10.374}, "13965": {"loss_ce": 0.275, "loss_counter": 0.105, "loss_bbox": 0.016, "loss_giou": 0.196, "loss_self_iou": 0.006, "cardinality_error": 7.128, "loss_ce_0": 0.271, "loss_counter_0": 0.106, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.128, "loss_caption_0": 1.809, "loss_caption": 1.8, "total_loss": 10.055}, "14098": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.016, "loss_giou": 0.213, "loss_self_iou": 0.007, "cardinality_error": 7.925, "loss_ce_0": 0.273, "loss_counter_0": 0.113, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 1.863, "loss_caption": 1.863, "total_loss": 10.433}, "14231": {"loss_ce": 0.261, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.007, "cardinality_error": 7.82, "loss_ce_0": 0.262, "loss_counter_0": 0.114, "loss_bbox_0": 0.018, "loss_giou_0": 0.222, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 1.936, "loss_caption": 1.929, "total_loss": 10.624}, "14364": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.263, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.227, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.757, "loss_caption": 1.754, "total_loss": 9.948}, "14497": {"loss_ce": 0.266, "loss_counter": 0.11, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.265, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.827, "loss_caption_0": 1.896, "loss_caption": 1.894, "total_loss": 10.407}, "14630": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.925, "loss_ce_0": 0.261, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.224, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.925, "loss_caption_0": 1.84, "loss_caption": 1.842, "total_loss": 10.253}, "14763": {"loss_ce": 0.266, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.006, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.225, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.87, "loss_caption": 1.877, "total_loss": 10.398}, "14896": {"loss_ce": 0.26, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.217, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.692, "loss_caption_0": 1.796, "loss_caption": 1.784, "total_loss": 9.979}, "15029": {"loss_ce": 0.264, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.006, "cardinality_error": 7.414, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.414, "loss_caption_0": 1.763, "loss_caption": 1.767, "total_loss": 9.842}, "15162": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.262, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.767, "loss_caption_0": 1.781, "loss_caption": 1.781, "total_loss": 9.916}, "15295": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.662, "loss_ce_0": 0.255, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.662, "loss_caption_0": 1.735, "loss_caption": 1.75, "total_loss": 9.755}, "15428": {"loss_ce": 0.258, "loss_counter": 0.114, "loss_bbox": 0.015, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.992, "loss_ce_0": 0.261, "loss_counter_0": 0.114, "loss_bbox_0": 0.016, "loss_giou_0": 0.22, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.852, "loss_caption": 1.86, "total_loss": 10.298}, "15561": {"loss_ce": 0.256, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.006, "cardinality_error": 8.068, "loss_ce_0": 0.257, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.068, "loss_caption_0": 1.878, "loss_caption": 1.866, "total_loss": 10.314}, "15694": {"loss_ce": 0.256, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.202, "loss_self_iou": 0.004, "cardinality_error": 7.647, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.215, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.647, "loss_caption_0": 1.7, "loss_caption": 1.684, "total_loss": 9.569}, "15827": {"loss_ce": 0.259, "loss_counter": 0.104, "loss_bbox": 0.016, "loss_giou": 0.194, "loss_self_iou": 0.005, "cardinality_error": 7.722, "loss_ce_0": 0.257, "loss_counter_0": 0.105, "loss_bbox_0": 0.017, "loss_giou_0": 0.209, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.722, "loss_caption_0": 1.848, "loss_caption": 1.839, "total_loss": 10.119}, "15960": {"loss_ce": 0.26, "loss_counter": 0.107, "loss_bbox": 0.015, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 7.609, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.609, "loss_caption_0": 1.847, "loss_caption": 1.858, "total_loss": 10.198}, "16093": {"loss_ce": 0.257, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.258, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.773, "loss_caption": 1.769, "total_loss": 9.789}, "16226": {"loss_ce": 0.26, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.004, "cardinality_error": 7.805, "loss_ce_0": 0.259, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.215, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.805, "loss_caption_0": 1.743, "loss_caption": 1.749, "total_loss": 9.786}, "16359": {"loss_ce": 0.265, "loss_counter": 0.116, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.115, "loss_bbox_0": 0.015, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.797, "loss_caption": 1.778, "total_loss": 9.972}, "16492": {"loss_ce": 0.254, "loss_counter": 0.105, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.383, "loss_ce_0": 0.257, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.383, "loss_caption_0": 1.796, "loss_caption": 1.808, "total_loss": 9.899}, "16625": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.005, "cardinality_error": 7.782, "loss_ce_0": 0.256, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.203, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.782, "loss_caption_0": 1.78, "loss_caption": 1.779, "total_loss": 9.812}, "16758": {"loss_ce": 0.252, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.962, "loss_ce_0": 0.252, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.962, "loss_caption_0": 1.795, "loss_caption": 1.806, "total_loss": 9.948}, "16891": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.016, "loss_giou": 0.199, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.255, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.788, "loss_caption": 1.782, "total_loss": 9.914}, "17024": {"loss_ce": 0.262, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.511, "loss_ce_0": 0.26, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.511, "loss_caption_0": 1.717, "loss_caption": 1.72, "total_loss": 9.666}, "17157": {"loss_ce": 0.25, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.252, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.738, "loss_caption": 1.749, "total_loss": 9.638}, "17290": {"loss_ce": 0.254, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.932, "loss_ce_0": 0.254, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.932, "loss_caption_0": 1.815, "loss_caption": 1.83, "total_loss": 10.067}, "17423": {"loss_ce": 0.262, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.208, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 1.865, "loss_caption": 1.881, "total_loss": 10.261}, "17556": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.193, "loss_self_iou": 0.004, "cardinality_error": 7.737, "loss_ce_0": 0.253, "loss_counter_0": 0.112, "loss_bbox_0": 0.015, "loss_giou_0": 0.209, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.744, "loss_caption": 1.743, "total_loss": 9.707}, "17689": {"loss_ce": 0.259, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.199, "loss_self_iou": 0.006, "cardinality_error": 7.602, "loss_ce_0": 0.262, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.602, "loss_caption_0": 1.835, "loss_caption": 1.819, "total_loss": 10.1}, "17822": {"loss_ce": 0.25, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.191, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.249, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.681, "loss_caption": 1.67, "total_loss": 9.397}, "17955": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.184, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.252, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.757, "loss_caption": 1.745, "total_loss": 9.658}, "18088": {"loss_ce": 0.251, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.534, "loss_ce_0": 0.251, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.191, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.534, "loss_caption_0": 1.703, "loss_caption": 1.701, "total_loss": 9.39}, "18221": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.201, "loss_self_iou": 0.005, "cardinality_error": 8.211, "loss_ce_0": 0.252, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.213, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.211, "loss_caption_0": 1.824, "loss_caption": 1.816, "total_loss": 10.053}, "18354": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.249, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.21, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.792, "loss_caption": 1.779, "total_loss": 9.874}, "18487": {"loss_ce": 0.255, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.251, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.205, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.826, "loss_caption": 1.81, "total_loss": 9.979}, "18620": {"loss_ce": 0.251, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.003, "cardinality_error": 7.737, "loss_ce_0": 0.251, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.767, "loss_caption": 1.771, "total_loss": 9.784}, "18753": {"loss_ce": 0.247, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 8.241, "loss_ce_0": 0.251, "loss_counter_0": 0.115, "loss_bbox_0": 0.014, "loss_giou_0": 0.207, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.241, "loss_caption_0": 1.758, "loss_caption": 1.759, "total_loss": 9.756}, "18886": {"loss_ce": 0.247, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.182, "loss_self_iou": 0.004, "cardinality_error": 7.436, "loss_ce_0": 0.245, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.194, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.436, "loss_caption_0": 1.696, "loss_caption": 1.692, "total_loss": 9.366}, "19019": {"loss_ce": 0.243, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 7.692, "loss_ce_0": 0.242, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.73, "loss_caption": 1.729, "total_loss": 9.496}, "19152": {"loss_ce": 0.251, "loss_counter": 0.112, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.251, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.779, "loss_caption": 1.771, "total_loss": 9.714}, "19285": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.014, "loss_giou": 0.194, "loss_self_iou": 0.004, "cardinality_error": 7.669, "loss_ce_0": 0.25, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.204, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.669, "loss_caption_0": 1.76, "loss_caption": 1.772, "total_loss": 9.759}, "19418": {"loss_ce": 0.244, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 8.256, "loss_ce_0": 0.245, "loss_counter_0": 0.117, "loss_bbox_0": 0.014, "loss_giou_0": 0.211, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.256, "loss_caption_0": 1.754, "loss_caption": 1.758, "total_loss": 9.747}, "19551": {"loss_ce": 0.249, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.253, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.68, "loss_caption": 1.689, "total_loss": 9.3}, "19684": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.005, "cardinality_error": 7.474, "loss_ce_0": 0.262, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.199, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.474, "loss_caption_0": 1.81, "loss_caption": 1.803, "total_loss": 9.923}, "19817": {"loss_ce": 0.246, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.247, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.769, "loss_caption": 1.765, "total_loss": 9.677}, "19950": {"loss_ce": 0.254, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.253, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.736, "loss_caption": 1.748, "total_loss": 9.654}, "20083": {"loss_ce": 0.254, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.519, "loss_ce_0": 0.257, "loss_counter_0": 0.103, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.519, "loss_caption_0": 1.743, "loss_caption": 1.756, "total_loss": 9.655}, "20216": {"loss_ce": 0.244, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.759, "loss_ce_0": 0.244, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.759, "loss_caption_0": 1.79, "loss_caption": 1.781, "total_loss": 9.713}, "20349": {"loss_ce": 0.246, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.245, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.749, "loss_caption": 1.759, "total_loss": 9.675}, "20482": {"loss_ce": 0.244, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.005, "cardinality_error": 7.94, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.207, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.94, "loss_caption_0": 1.694, "loss_caption": 1.715, "total_loss": 9.502}, "20615": {"loss_ce": 0.257, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.005, "cardinality_error": 7.368, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.368, "loss_caption_0": 1.77, "loss_caption": 1.771, "total_loss": 9.775}, "20748": {"loss_ce": 0.247, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.857, "loss_ce_0": 0.247, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.857, "loss_caption_0": 1.786, "loss_caption": 1.773, "total_loss": 9.695}, "20881": {"loss_ce": 0.243, "loss_counter": 0.103, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.594, "loss_ce_0": 0.242, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.594, "loss_caption_0": 1.746, "loss_caption": 1.748, "total_loss": 9.541}, "21014": {"loss_ce": 0.249, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 8.09, "loss_ce_0": 0.249, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.09, "loss_caption_0": 1.709, "loss_caption": 1.698, "total_loss": 9.49}, "21147": {"loss_ce": 0.246, "loss_counter": 0.115, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.248, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.198, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.733, "loss_caption": 1.732, "total_loss": 9.57}, "21280": {"loss_ce": 0.246, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.632, "loss_ce_0": 0.245, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.632, "loss_caption_0": 1.646, "loss_caption": 1.658, "total_loss": 9.233}, "21413": {"loss_ce": 0.24, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.541, "loss_ce_0": 0.239, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.541, "loss_caption_0": 1.637, "loss_caption": 1.633, "total_loss": 9.069}, "21546": {"loss_ce": 0.245, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.172, "loss_self_iou": 0.004, "cardinality_error": 7.624, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.185, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.624, "loss_caption_0": 1.773, "loss_caption": 1.784, "total_loss": 9.621}, "21679": {"loss_ce": 0.239, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.238, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.809, "loss_caption": 1.805, "total_loss": 9.791}, "21812": {"loss_ce": 0.246, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.677, "loss_ce_0": 0.25, "loss_counter_0": 0.108, "loss_bbox_0": 0.013, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.677, "loss_caption_0": 1.674, "loss_caption": 1.676, "total_loss": 9.277}, "21945": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.192, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.244, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.713, "loss_caption": 1.714, "total_loss": 9.531}, "22078": {"loss_ce": 0.251, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.707, "loss_ce_0": 0.247, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.707, "loss_caption_0": 1.772, "loss_caption": 1.758, "total_loss": 9.738}, "22211": {"loss_ce": 0.249, "loss_counter": 0.101, "loss_bbox": 0.013, "loss_giou": 0.18, "loss_self_iou": 0.005, "cardinality_error": 7.541, "loss_ce_0": 0.249, "loss_counter_0": 0.101, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.541, "loss_caption_0": 1.665, "loss_caption": 1.66, "total_loss": 9.243}, "22344": {"loss_ce": 0.246, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 8.008, "loss_ce_0": 0.248, "loss_counter_0": 0.115, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.008, "loss_caption_0": 1.799, "loss_caption": 1.784, "total_loss": 9.823}, "22477": {"loss_ce": 0.246, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.184, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.247, "loss_counter_0": 0.104, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.722, "loss_caption": 1.733, "total_loss": 9.525}, "22610": {"loss_ce": 0.243, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 7.729, "loss_ce_0": 0.245, "loss_counter_0": 0.106, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.729, "loss_caption_0": 1.664, "loss_caption": 1.667, "total_loss": 9.297}, "22743": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.714, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.017, "loss_giou_0": 0.21, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.714, "loss_caption_0": 1.773, "loss_caption": 1.775, "total_loss": 9.803}, "22876": {"loss_ce": 0.245, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.774, "loss_ce_0": 0.249, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.774, "loss_caption_0": 1.76, "loss_caption": 1.759, "total_loss": 9.631}, "23009": {"loss_ce": 0.237, "loss_counter": 0.105, "loss_bbox": 0.012, "loss_giou": 0.171, "loss_self_iou": 0.003, "cardinality_error": 7.872, "loss_ce_0": 0.237, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.183, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.872, "loss_caption_0": 1.69, "loss_caption": 1.688, "total_loss": 9.229}, "23142": {"loss_ce": 0.242, "loss_counter": 0.098, "loss_bbox": 0.013, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.744, "loss_ce_0": 0.239, "loss_counter_0": 0.1, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.744, "loss_caption_0": 1.66, "loss_caption": 1.663, "total_loss": 9.173}, "23275": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 7.82, "loss_ce_0": 0.242, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.727, "loss_caption": 1.741, "total_loss": 9.535}, "23408": {"loss_ce": 0.235, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.173, "loss_self_iou": 0.004, "cardinality_error": 7.083, "loss_ce_0": 0.235, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.182, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.083, "loss_caption_0": 1.678, "loss_caption": 1.68, "total_loss": 9.181}, "23541": {"loss_ce": 0.25, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.185, "loss_self_iou": 0.003, "cardinality_error": 7.782, "loss_ce_0": 0.253, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.782, "loss_caption_0": 1.686, "loss_caption": 1.674, "total_loss": 9.361}, "23674": {"loss_ce": 0.242, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.242, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.699, "loss_caption_0": 1.734, "loss_caption": 1.755, "total_loss": 9.502}, "23807": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 8.023, "loss_ce_0": 0.248, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.199, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.838, "loss_caption": 1.842, "total_loss": 10.01}, "23940": {"loss_ce": 0.242, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.246, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.661, "loss_caption": 1.655, "total_loss": 9.188}, "24073": {"loss_ce": 0.244, "loss_counter": 0.11, "loss_bbox": 0.012, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.97, "loss_ce_0": 0.246, "loss_counter_0": 0.112, "loss_bbox_0": 0.013, "loss_giou_0": 0.191, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.97, "loss_caption_0": 1.689, "loss_caption": 1.683, "total_loss": 9.309}, "24206": {"loss_ce": 0.237, "loss_counter": 0.118, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 8.286, "loss_ce_0": 0.236, "loss_counter_0": 0.118, "loss_bbox_0": 0.013, "loss_giou_0": 0.195, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.286, "loss_caption_0": 1.712, "loss_caption": 1.715, "total_loss": 9.432}, "24339": {"loss_ce": 0.245, "loss_counter": 0.098, "loss_bbox": 0.012, "loss_giou": 0.167, "loss_self_iou": 0.003, "cardinality_error": 7.316, "loss_ce_0": 0.247, "loss_counter_0": 0.099, "loss_bbox_0": 0.013, "loss_giou_0": 0.179, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.316, "loss_caption_0": 1.695, "loss_caption": 1.701, "total_loss": 9.257}, "24472": {"loss_ce": 0.243, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.003, "cardinality_error": 7.459, "loss_ce_0": 0.248, "loss_counter_0": 0.109, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.459, "loss_caption_0": 1.699, "loss_caption": 1.699, "total_loss": 9.337}, "24605": {"loss_ce": 0.242, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.18, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.243, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.812, "loss_caption_0": 1.775, "loss_caption": 1.773, "total_loss": 9.644}, "24738": {"loss_ce": 0.243, "loss_counter": 0.101, "loss_bbox": 0.016, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.556, "loss_ce_0": 0.246, "loss_counter_0": 0.103, "loss_bbox_0": 0.016, "loss_giou_0": 0.196, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.556, "loss_caption_0": 1.727, "loss_caption": 1.73, "total_loss": 9.525}, "24871": {"loss_ce": 0.239, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.241, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.77, "loss_caption": 1.773, "total_loss": 9.641}, "25004": {"loss_ce": 0.246, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 8.143, "loss_ce_0": 0.247, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.143, "loss_caption_0": 1.692, "loss_caption": 1.684, "total_loss": 9.379}, "25137": {"loss_ce": 0.245, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.179, "loss_self_iou": 0.004, "cardinality_error": 7.88, "loss_ce_0": 0.245, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.88, "loss_caption_0": 1.691, "loss_caption": 1.696, "total_loss": 9.347}, "25270": {"loss_ce": 0.237, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.185, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.238, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.767, "loss_caption_0": 1.687, "loss_caption": 1.694, "total_loss": 9.34}, "25403": {"loss_ce": 0.247, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.005, "cardinality_error": 7.429, "loss_ce_0": 0.248, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.429, "loss_caption_0": 1.705, "loss_caption": 1.695, "total_loss": 9.343}, "25536": {"loss_ce": 0.241, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.189, "loss_self_iou": 0.003, "cardinality_error": 7.887, "loss_ce_0": 0.246, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.887, "loss_caption_0": 1.717, "loss_caption": 1.729, "total_loss": 9.517}, "25669": {"loss_ce": 0.239, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.707, "loss_ce_0": 0.243, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.707, "loss_caption_0": 1.718, "loss_caption": 1.711, "total_loss": 9.385}, "25802": {"loss_ce": 0.24, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 8.173, "loss_ce_0": 0.242, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.173, "loss_caption_0": 1.732, "loss_caption": 1.735, "total_loss": 9.515}, "25935": {"loss_ce": 0.241, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.241, "loss_counter_0": 0.107, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.626, "loss_caption": 1.628, "total_loss": 9.063}, "26068": {"loss_ce": 0.24, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.182, "loss_self_iou": 0.005, "cardinality_error": 7.444, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.444, "loss_caption_0": 1.697, "loss_caption": 1.701, "total_loss": 9.35}, "26201": {"loss_ce": 0.239, "loss_counter": 0.097, "loss_bbox": 0.014, "loss_giou": 0.168, "loss_self_iou": 0.005, "cardinality_error": 7.301, "loss_ce_0": 0.237, "loss_counter_0": 0.099, "loss_bbox_0": 0.015, "loss_giou_0": 0.181, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.301, "loss_caption_0": 1.702, "loss_caption": 1.703, "total_loss": 9.254}, "26334": {"loss_ce": 0.238, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.174, "loss_self_iou": 0.003, "cardinality_error": 7.827, "loss_ce_0": 0.242, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.827, "loss_caption_0": 1.729, "loss_caption": 1.725, "total_loss": 9.424}, "26467": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 8.023, "loss_ce_0": 0.245, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.195, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.751, "loss_caption": 1.746, "total_loss": 9.586}, "26600": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.902, "loss_ce_0": 0.242, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.902, "loss_caption_0": 1.727, "loss_caption": 1.737, "total_loss": 9.533}}, "lr_history": {"133": 5e-05, "266": 5e-05, "399": 5e-05, "532": 5e-05, "665": 5e-05, "798": 5e-05, "931": 5e-05, "1064": 5e-05, "1197": 5e-05, "1330": 5e-05, "1463": 5e-05, "1596": 5e-05, "1729": 5e-05, "1862": 5e-05, "1995": 5e-05, "2128": 5e-05, "2261": 5e-05, "2394": 5e-05, "2527": 5e-05, "2660": 5e-05, "2793": 5e-05, "2926": 5e-05, "3059": 5e-05, "3192": 5e-05, "3325": 5e-05, "3458": 5e-05, "3591": 5e-05, "3724": 5e-05, "3857": 5e-05, "3990": 5e-05, "4123": 5e-05, "4256": 5e-05, "4389": 5e-05, "4522": 5e-05, "4655": 5e-05, "4788": 5e-05, "4921": 5e-05, "5054": 5e-05, "5187": 5e-05, "5320": 5e-05, "5453": 5e-05, "5586": 5e-05, "5719": 5e-05, "5852": 5e-05, "5985": 5e-05, "6118": 5e-05, "6251": 5e-05, "6384": 5e-05, "6517": 5e-05, "6650": 5e-05, "6783": 5e-05, "6916": 5e-05, "7049": 5e-05, "7182": 5e-05, "7315": 5e-05, "7448": 5e-05, "7581": 5e-05, "7714": 5e-05, "7847": 5e-05, "7980": 5e-05, "8113": 5e-05, "8246": 5e-05, "8379": 5e-05, "8512": 5e-05, "8645": 5e-05, "8778": 5e-05, "8911": 5e-05, "9044": 5e-05, "9177": 5e-05, "9310": 5e-05, "9443": 5e-05, "9576": 5e-05, "9709": 5e-05, "9842": 5e-05, "9975": 5e-05, "10108": 5e-05, "10241": 5e-05, "10374": 5e-05, "10507": 5e-05, "10640": 5e-05, "10773": 2.5e-05, "10906": 2.5e-05, "11039": 2.5e-05, "11172": 2.5e-05, "11305": 2.5e-05, "11438": 2.5e-05, "11571": 2.5e-05, "11704": 2.5e-05, "11837": 2.5e-05, "11970": 2.5e-05, "12103": 2.5e-05, "12236": 2.5e-05, "12369": 2.5e-05, "12502": 2.5e-05, "12635": 2.5e-05, "12768": 2.5e-05, "12901": 2.5e-05, "13034": 2.5e-05, "13167": 2.5e-05, "13300": 2.5e-05, "13433": 2.5e-05, "13566": 2.5e-05, "13699": 2.5e-05, "13832": 2.5e-05, "13965": 2.5e-05, "14098": 2.5e-05, "14231": 2.5e-05, "14364": 2.5e-05, "14497": 2.5e-05, "14630": 2.5e-05, "14763": 1.25e-05, "14896": 1.25e-05, "15029": 1.25e-05, "15162": 1.25e-05, "15295": 1.25e-05, "15428": 1.25e-05, "15561": 1.25e-05, "15694": 1.25e-05, "15827": 1.25e-05, "15960": 1.25e-05, "16093": 1.25e-05, "16226": 1.25e-05, "16359": 1.25e-05, "16492": 1.25e-05, "16625": 1.25e-05, "16758": 1.25e-05, "16891": 1.25e-05, "17024": 1.25e-05, "17157": 1.25e-05, "17290": 1.25e-05, "17423": 1.25e-05, "17556": 1.25e-05, "17689": 1.25e-05, "17822": 1.25e-05, "17955": 1.25e-05, "18088": 1.25e-05, "18221": 1.25e-05, "18354": 1.25e-05, "18487": 1.25e-05, "18620": 1.25e-05, "18753": 6.25e-06, "18886": 6.25e-06, "19019": 6.25e-06, "19152": 6.25e-06, "19285": 6.25e-06, "19418": 6.25e-06, "19551": 6.25e-06, "19684": 6.25e-06, "19817": 6.25e-06, "19950": 6.25e-06, "20083": 6.25e-06, "20216": 6.25e-06, "20349": 6.25e-06, "20482": 6.25e-06, "20615": 6.25e-06, "20748": 6.25e-06, "20881": 6.25e-06, "21014": 6.25e-06, "21147": 6.25e-06, "21280": 6.25e-06, "21413": 6.25e-06, "21546": 6.25e-06, "21679": 6.25e-06, "21812": 6.25e-06, "21945": 6.25e-06, "22078": 6.25e-06, "22211": 6.25e-06, "22344": 6.25e-06, "22477": 6.25e-06, "22610": 6.25e-06, "22743": 3.125e-06, "22876": 3.125e-06, "23009": 3.125e-06, "23142": 3.125e-06, "23275": 3.125e-06, "23408": 3.125e-06, "23541": 3.125e-06, "23674": 3.125e-06, "23807": 3.125e-06, "23940": 3.125e-06, "24073": 3.125e-06, "24206": 3.125e-06, "24339": 3.125e-06, "24472": 3.125e-06, "24605": 3.125e-06, "24738": 3.125e-06, "24871": 3.125e-06, "25004": 3.125e-06, "25137": 3.125e-06, "25270": 3.125e-06, "25403": 3.125e-06, "25536": 3.125e-06, "25669": 3.125e-06, "25802": 3.125e-06, "25935": 3.125e-06, "26068": 3.125e-06, "26201": 3.125e-06, "26334": 3.125e-06, "26467": 3.125e-06, "26600": 3.125e-06}}, "eval_history": {}} \ No newline at end of file diff --git a/yc2_univl/model-best.pth b/yc2_univl/model-best.pth new file mode 100644 index 0000000000000000000000000000000000000000..42c65282e76faa81b01540eeb0c653178e2e7f49 --- /dev/null +++ b/yc2_univl/model-best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5649639a5e2d91c0e6430f7ef3a969419b6a920dabf8cac4617479f64d1d76 +size 377084545 diff --git a/yc2_univl/model-last.pth b/yc2_univl/model-last.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd48389557fb5978c075e01fe7bcfa5562d45dd2 --- /dev/null +++ b/yc2_univl/model-last.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417eb7f05c35061d38ba56c6fab6b264b7e8c9c6d02cbf125c5586c1087696be +size 377084545 diff --git a/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 b/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 new file mode 100644 index 0000000000000000000000000000000000000000..d19b25d3fe05018fbced327351295a36da6e2a4d --- /dev/null +++ b/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cdd59bc89bebf7407e106fa3a4f04453f379c084fc388a64117a235f79c9746 +size 179068 diff --git a/yc2_univl/train.log b/yc2_univl/train.log new file mode 100644 index 0000000000000000000000000000000000000000..bb2d6d2ae6890d5bf94f03e99b8267cbac0b8177 --- /dev/null +++ b/yc2_univl/train.log @@ -0,0 +1,1516 @@ +backup evironment completed ! +Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal + + +******************** All args: ************************************************* +align_contiguous = False +align_drop_z = 0 +align_keep_percentile = 0.1 +align_many_to_one = False +align_one_to_many = False +align_top_band_size = 0 +att_hid_size = 512 +aux_loss = True +backbone = None +base_cfg_path = cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml +basic_ss_prob = 0 +batch_size = 1 +batch_size_for_eval = 1 +bbox_loss_coef = 0 +beta = 1 +cap_dec_n_points = 4 +cap_nheads = 1 +cap_num_feature_levels = 4 +cap_prob_clip = False +caption_cost_type = loss +caption_decoder_type = standard +caption_loss_coef = 2 +cfg_path = cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml +cl_schedule_time = [0, 2] +cl_schedule_val = [0, 0.1] +clip_context_dim = 512 +cls_loss_coef = 2 +contrastive_hidden_size = 128 +contrastive_loss_start_coef = 0.0 +contrastive_loss_temperature = 0.1 +cost_alpha = 0.25 +cost_gamma = 2 +count_loss_coef = 0.5 +criteria_for_best_ckpt = overall +current_lr = 5e-05 +data_norm = 0 +data_rescale = 1 +debug = False +dec_layers = 2 +dec_n_points = 4 +device = cuda +dict_file = data/howto/vocabulary_howto_rate2_yc2.json +dict_file_val = data/howto/vocabulary_howto_rate2_yc2.json +dilation = False +disable_contrastive_projection = 1 +disable_cudnn = 0 +disable_mid_caption_heads = False +disable_rematch = False +disable_tqdm = False +drop_prob = 0.5 +ec_alpha = 1.0 +enable_bg_for_cl = True +enable_contrastive = False +enable_cross_video_cl = True +enable_e2t_cl = True +enc_layers = 2 +enc_n_points = 4 +eos_coef = 0.1 +epoch = 20 +event_context_dim = None +feature_dim = 768 +feature_sample_rate = 1 +fix_xcw = 1 +focal_alpha = 0.25 +focal_gamma = 2.0 +focal_mil = False +frame_embedding_num = 200 +ft_gt_percent = 1.0 +giou_loss_coef = 4 +gpu_id = [] +grad_clip = 100.0 +gt_file_for_auc = data/anet/captiondata/val_all.json +gt_file_for_eval = ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval = ['data/yc2/captiondata/para/para_yc2_val.json'] +gt_proposal_sample_num = 20 +hidden_dim = 512 +hidden_dropout_prob = 0.5 +huggingface_cache_dir = .cache +id = seq2-ft(mix)-gt_percent-1.0 +id_ori = +input_encoding_size = 512 +invalid_video_json = [] +iteration = 3 +layer_norm_eps = 1e-12 +learning_rate_decay_every = 3 +learning_rate_decay_rate = 0.5 +learning_rate_decay_start = 8 +lloss_beta = 1 +lloss_cross_entropy = 0 +lloss_focal_loss = 0 +lloss_gau_mask = 1 +lr = 5e-05 +lr_backbone = 2e-05 +lr_backbone_names = ['None'] +lr_linear_proj_mult = 0.1 +lr_linear_proj_names = ['reference_points', 'sampling_offsets'] +lr_proj = 0 +map = True +matcher_type = default +max_caption_len = 50 +max_eseq_length = 20 +max_pos_num = 500 +max_text_input_len = 32 +merge_criterion = ins_cap_topk +merge_k_boxes = 3 +merge_mode = weighted_sum +mil_loss_coef = 0 +min_epoch_when_save = -1 +nheads = 8 +norm_ins_score = sigmoid +nthreads = 4 +num_classes = 1 +num_feature_levels = 4 +num_layers = 1 +num_neg_box = 10 +num_queries = 100 +optimizer_type = adam +position_embedding = sine +position_embedding_scale = 6.283185307179586 +pre_percent = 1.0 +pretrain = None +pretrain_path = +pretrained_language_model = UniVL +prior_anchor_duration_init = True +prior_manner = all +pseudo_box_aug = False +pseudo_box_aug_mode = random_range +pseudo_box_aug_num = 8 +pseudo_box_aug_ratio = 0.02 +pseudo_box_type = similarity_op_order_v2 +random_anchor_init = True +random_seed = False +ref_rank_loss_coef = 0.0 +refine_pseudo_box = False +refine_pseudo_stage_num = 2 +rnn_size = 512 +sample_method = nearest +save_all_checkpoint = 0 +save_checkpoint_every = 1 +save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs +scheduled_sampling_increase_every = 2 +scheduled_sampling_increase_prob = 0.05 +scheduled_sampling_max_prob = 0.25 +scheduled_sampling_start = -1 +seed = 777 +self_iou_loss_coef = 0.0 +set_cost_bbox = 0 +set_cost_caption = 0 +set_cost_cl = 0.0 +set_cost_class = 2 +set_cost_giou = 4 +set_cost_sim = 1.0 +share_caption_head = 1 +soft_attention = 1 +start_from = +start_from_mode = last +start_refine_epoch = -1 +statistic_mode = mode +test = False +text_encoder_learning_strategy = frozen +text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] +text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] +text_hidden_dim = 768 +top_frames = 25 +train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json'] +train_proposal_sample_num = 30 +train_proposal_type = gt +training_scheme = all +transformer_dropout_prob = 0.1 +transformer_ff_dim = 512 +transformer_input_type = queries +use_additional_cap_layer = False +use_additional_score_layer = False +use_anchor = 0 +use_neg_pseudo_box = False +use_pseudo_box = False +use_query_box_for_refine = 0 +val_caption_file = data/yc2/captiondata/yc2_val.json +visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] +visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] +visual_feature_type = ['UniVL'] +vocab_size = 14538 +vocab_size_val = 14538 +weight_decay = 0.0001 +weighted_mil_loss = False +width_ratio = 1 +width_th = 1 +window_size = 3 +with_box_refine = 1 +wordRNN_input_feats_type = C + + +******************** Model structure: ****************************************** +PDVC( + (base_encoder): BaseEncoder( + (pos_embed): PositionEmbeddingSine( + (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True) + ) + (input_proj): ModuleList( + (0): Sequential( + (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (1): Sequential( + (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (2): Sequential( + (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + (3): Sequential( + (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) + (1): GroupNorm(32, 512, eps=1e-05, affine=True) + ) + ) + ) + (transformer): DeformableTransformer( + (encoder): DeformableTransformerEncoder( + (layers): ModuleList( + (0): DeformableTransformerEncoderLayer( + (self_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout2): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + (1): DeformableTransformerEncoderLayer( + (self_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout2): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (decoder): DeformableTransformerDecoder( + (layers): ModuleList( + (0): DeformableTransformerDecoderLayer( + (cross_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (self_attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (dropout2): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout4): Dropout(p=0.1, inplace=False) + (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + (1): DeformableTransformerDecoderLayer( + (cross_attn): MSDeformAttn( + (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) + (attention_weights): Linear(in_features=512, out_features=128, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (dropout1): Dropout(p=0.1, inplace=False) + (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (self_attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (dropout2): Dropout(p=0.1, inplace=False) + (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (linear1): Linear(in_features=512, out_features=512, bias=True) + (dropout3): Dropout(p=0.1, inplace=False) + (linear2): Linear(in_features=512, out_features=512, bias=True) + (dropout4): Dropout(p=0.1, inplace=False) + (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + ) + ) + (bbox_head): ModuleList( + (0): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + (1): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + ) + ) + (pos_trans): Linear(in_features=512, out_features=1024, bias=True) + (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (reference_points): Linear(in_features=512, out_features=1, bias=True) + ) + (caption_head): ModuleList( + (0): LSTMDSACaptioner( + (embed): Embedding(14539, 512) + (logit): Linear(in_features=512, out_features=14539, bias=True) + (dropout): Dropout(p=0.5, inplace=False) + (core): ShowAttendTellCore( + (rnn): LSTM(1536, 512, bias=False, dropout=0.5) + (att_drop): Dropout(p=0.5, inplace=False) + (deformable_att): MSDeformAttnCap( + (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) + (attention_weights): Linear(in_features=1024, out_features=16, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (ctx2att): Linear(in_features=512, out_features=512, bias=True) + (h2att): Linear(in_features=512, out_features=512, bias=True) + (alpha_net): Linear(in_features=512, out_features=1, bias=True) + ) + ) + (1): LSTMDSACaptioner( + (embed): Embedding(14539, 512) + (logit): Linear(in_features=512, out_features=14539, bias=True) + (dropout): Dropout(p=0.5, inplace=False) + (core): ShowAttendTellCore( + (rnn): LSTM(1536, 512, bias=False, dropout=0.5) + (att_drop): Dropout(p=0.5, inplace=False) + (deformable_att): MSDeformAttnCap( + (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) + (attention_weights): Linear(in_features=1024, out_features=16, bias=True) + (value_proj): Linear(in_features=512, out_features=512, bias=True) + (output_proj): Linear(in_features=512, out_features=512, bias=True) + ) + (ctx2att): Linear(in_features=512, out_features=512, bias=True) + (h2att): Linear(in_features=512, out_features=512, bias=True) + (alpha_net): Linear(in_features=512, out_features=1, bias=True) + ) + ) + ) + (query_embed): Embedding(100, 1024) + (class_head): ModuleList( + (0): Linear(in_features=512, out_features=1, bias=True) + (1): Linear(in_features=512, out_features=1, bias=True) + ) + (class_refine_head): ModuleList( + (0): Linear(in_features=512, out_features=1, bias=True) + (1): Linear(in_features=512, out_features=1, bias=True) + ) + (count_head): ModuleList( + (0): Linear(in_features=512, out_features=21, bias=True) + (1): Linear(in_features=512, out_features=21, bias=True) + ) + (bbox_head): ModuleList( + (0): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + (1): MLP( + (layers): ModuleList( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): Linear(in_features=512, out_features=512, bias=True) + (2): Linear(in_features=512, out_features=2, bias=True) + ) + ) + ) + (contrastive_projection_event): ModuleList( + (0): Identity() + (1): Identity() + ) + (contrastive_projection_text): ModuleList( + (0): Identity() + (1): Identity() + ) +) + + +******************** Strat training ! ****************************************** +loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0']) +loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0]) +ID seq2-ft(mix)-gt_percent-1.0 iter 133 (epoch 0), +loss = OrderedDict([('loss_ce', 0.336), ('loss_counter', 0.129), ('loss_bbox', 0.039), ('loss_giou', 0.368), ('loss_self_iou', 0.028), ('cardinality_error', 7.797), ('loss_ce_0', 0.337), ('loss_counter_0', 0.13), ('loss_bbox_0', 0.041), ('loss_giou_0', 0.381), ('loss_self_iou_0', 0.03), ('cardinality_error_0', 7.797), ('loss_caption_0', 2.755), ('loss_caption', 2.681), ('total_loss', 15.341)]), +time/iter = 0.172, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 266 (epoch 0), +loss = OrderedDict([('loss_ce', 0.324), ('loss_counter', 0.129), ('loss_bbox', 0.036), ('loss_giou', 0.369), ('loss_self_iou', 0.018), ('cardinality_error', 7.812), ('loss_ce_0', 0.341), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.039), ('loss_giou_0', 0.38), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.812), ('loss_caption_0', 2.803), ('loss_caption', 2.638), ('total_loss', 15.341)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 399 (epoch 0), +loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.13), ('loss_bbox', 0.039), ('loss_giou', 0.375), ('loss_self_iou', 0.02), ('cardinality_error', 7.835), ('loss_ce_0', 0.324), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.043), ('loss_giou_0', 0.395), ('loss_self_iou_0', 0.021), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.81), ('loss_caption', 2.676), ('total_loss', 15.459)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 532 (epoch 0), +loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.133), ('loss_bbox', 0.044), ('loss_giou', 0.394), ('loss_self_iou', 0.02), ('cardinality_error', 7.902), ('loss_ce_0', 0.319), ('loss_counter_0', 0.133), ('loss_bbox_0', 0.05), ('loss_giou_0', 0.421), ('loss_self_iou_0', 0.026), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.817), ('loss_caption', 2.654), ('total_loss', 15.588)]), +time/iter = 0.167, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 665 (epoch 0), +loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.135), ('loss_bbox', 0.034), ('loss_giou', 0.345), ('loss_self_iou', 0.017), ('cardinality_error', 7.805), ('loss_ce_0', 0.319), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.038), ('loss_giou_0', 0.372), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.805), ('loss_caption_0', 2.758), ('loss_caption', 2.635), ('total_loss', 15.049)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 798 (epoch 0), +loss = OrderedDict([('loss_ce', 0.321), ('loss_counter', 0.125), ('loss_bbox', 0.03), ('loss_giou', 0.319), ('loss_self_iou', 0.015), ('cardinality_error', 7.774), ('loss_ce_0', 0.331), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.032), ('loss_giou_0', 0.344), ('loss_self_iou_0', 0.015), ('cardinality_error_0', 7.774), ('loss_caption_0', 2.66), ('loss_caption', 2.559), ('total_loss', 14.519)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 931 (epoch 0), +loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.122), ('loss_bbox', 0.027), ('loss_giou', 0.306), ('loss_self_iou', 0.011), ('cardinality_error', 7.865), ('loss_ce_0', 0.346), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.327), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.54), ('loss_caption', 2.468), ('total_loss', 14.017)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1064 (epoch 0), +loss = OrderedDict([('loss_ce', 0.331), ('loss_counter', 0.121), ('loss_bbox', 0.027), ('loss_giou', 0.292), ('loss_self_iou', 0.01), ('cardinality_error', 7.579), ('loss_ce_0', 0.345), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.311), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.579), ('loss_caption_0', 2.639), ('loss_caption', 2.626), ('total_loss', 14.419)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1197 (epoch 0), +loss = OrderedDict([('loss_ce', 0.325), ('loss_counter', 0.118), ('loss_bbox', 0.026), ('loss_giou', 0.296), ('loss_self_iou', 0.011), ('cardinality_error', 7.241), ('loss_ce_0', 0.339), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.317), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.241), ('loss_caption_0', 2.501), ('loss_caption', 2.496), ('total_loss', 13.892)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1330 (epoch 0), +loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.126), ('loss_bbox', 0.026), ('loss_giou', 0.304), ('loss_self_iou', 0.011), ('cardinality_error', 7.94), ('loss_ce_0', 0.334), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.332), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.94), ('loss_caption_0', 2.635), ('loss_caption', 2.619), ('total_loss', 14.504)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 1333: +Bleu_1:0.16894357888730638 +Bleu_2:0.09902176620134434 +Bleu_3:0.05312286436412136 +Bleu_4:0.026212861867102137 +METEOR:0.0791142699299577 +ROUGE_L:0.15563765109454591 +CIDEr:0.4087091055845523 +Recall:0.1991554685892762 +Precision:0.40083793546594454 +soda_c:0.05642652494419026 +para_Bleu_1:0.28013834967939705 +para_Bleu_2:0.16393959632782257 +para_Bleu_3:0.09809744775628881 +para_Bleu_4:0.060378126412557326 +para_METEOR:0.1286956339033507 +para_ROUGE_L:0.29903071052996405 +para_CIDEr:0.14675303603221324 + +overall score of iter 1333: 0.3358267963481213 + +Save model at iter 1333 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 1333 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 1463 (epoch 1), +loss = OrderedDict([('loss_ce', 0.322), ('loss_counter', 0.128), ('loss_bbox', 0.026), ('loss_giou', 0.301), ('loss_self_iou', 0.011), ('cardinality_error', 7.699), ('loss_ce_0', 0.335), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.316), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.448), ('loss_caption', 2.462), ('total_loss', 13.729)]), +time/iter = 0.660, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1596 (epoch 1), +loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.126), ('loss_bbox', 0.022), ('loss_giou', 0.284), ('loss_self_iou', 0.01), ('cardinality_error', 8.233), ('loss_ce_0', 0.322), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.31), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.348), ('loss_caption', 2.348), ('total_loss', 13.16)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1729 (epoch 1), +loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.124), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 7.632), ('loss_ce_0', 0.32), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.307), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.363), ('loss_caption', 2.353), ('total_loss', 13.14)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1862 (epoch 1), +loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.12), ('loss_bbox', 0.023), ('loss_giou', 0.268), ('loss_self_iou', 0.01), ('cardinality_error', 7.609), ('loss_ce_0', 0.32), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.29), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.609), ('loss_caption_0', 2.439), ('loss_caption', 2.419), ('total_loss', 13.343)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1995 (epoch 1), +loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.281), ('loss_self_iou', 0.009), ('cardinality_error', 7.541), ('loss_ce_0', 0.322), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.309), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.541), ('loss_caption_0', 2.503), ('loss_caption', 2.503), ('total_loss', 13.766)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2128 (epoch 1), +loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.126), ('loss_bbox', 0.024), ('loss_giou', 0.284), ('loss_self_iou', 0.009), ('cardinality_error', 7.789), ('loss_ce_0', 0.324), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.301), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.789), ('loss_caption_0', 2.5), ('loss_caption', 2.493), ('total_loss', 13.73)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2261 (epoch 1), +loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.122), ('loss_bbox', 0.023), ('loss_giou', 0.285), ('loss_self_iou', 0.012), ('cardinality_error', 7.902), ('loss_ce_0', 0.316), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.304), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.425), ('loss_caption', 2.424), ('total_loss', 13.426)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2394 (epoch 1), +loss = OrderedDict([('loss_ce', 0.315), ('loss_counter', 0.126), ('loss_bbox', 0.025), ('loss_giou', 0.29), ('loss_self_iou', 0.011), ('cardinality_error', 7.534), ('loss_ce_0', 0.323), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.308), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.439), ('loss_caption', 2.435), ('total_loss', 13.54)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2527 (epoch 1), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.125), ('loss_bbox', 0.023), ('loss_giou', 0.276), ('loss_self_iou', 0.009), ('cardinality_error', 7.647), ('loss_ce_0', 0.319), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.647), ('loss_caption_0', 2.454), ('loss_caption', 2.455), ('total_loss', 13.492)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2660 (epoch 1), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.131), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 8.0), ('loss_ce_0', 0.317), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.294), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.0), ('loss_caption_0', 2.464), ('loss_caption', 2.451), ('total_loss', 13.487)]), +time/iter = 0.167, bad_vid = 0.000 + +Validation results of iter 2666: +Bleu_1:0.18247710374533507 +Bleu_2:0.10433126216854799 +Bleu_3:0.05471515540980739 +Bleu_4:0.025315544998990337 +METEOR:0.08392673175891194 +ROUGE_L:0.16810710582244187 +CIDEr:0.48711946137609907 +Recall:0.23104975652842194 +Precision:0.4442690424090867 +soda_c:0.06454827356060923 +para_Bleu_1:0.27953804293947354 +para_Bleu_2:0.1635778619591909 +para_Bleu_3:0.09761782578266559 +para_Bleu_4:0.060085255296605154 +para_METEOR:0.13134445752685775 +para_ROUGE_L:0.3040652157082556 +para_CIDEr:0.15701615141849948 + +overall score of iter 2666: 0.34844586424196233 + +Save model at iter 2666 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 2666 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 2793 (epoch 2), +loss = OrderedDict([('loss_ce', 0.309), ('loss_counter', 0.119), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.01), ('cardinality_error', 7.556), ('loss_ce_0', 0.312), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.556), ('loss_caption_0', 2.27), ('loss_caption', 2.276), ('total_loss', 12.632)]), +time/iter = 0.666, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2926 (epoch 2), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.121), ('loss_bbox', 0.023), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.444), ('loss_ce_0', 0.317), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.287), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.444), ('loss_caption_0', 2.276), ('loss_caption', 2.291), ('total_loss', 12.726)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3059 (epoch 2), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.127), ('loss_bbox', 0.02), ('loss_giou', 0.272), ('loss_self_iou', 0.008), ('cardinality_error', 8.135), ('loss_ce_0', 0.302), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.135), ('loss_caption_0', 2.364), ('loss_caption', 2.364), ('total_loss', 13.057)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3192 (epoch 2), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.699), ('loss_ce_0', 0.306), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.286), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.367), ('loss_caption', 2.381), ('total_loss', 13.038)]), +time/iter = 0.177, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3325 (epoch 2), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.123), ('loss_bbox', 0.021), ('loss_giou', 0.274), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.3), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.291), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.323), ('loss_caption', 2.33), ('total_loss', 12.887)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3458 (epoch 2), +loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.277), ('loss_self_iou', 0.01), ('cardinality_error', 7.865), ('loss_ce_0', 0.31), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.295), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.351), ('loss_caption', 2.341), ('total_loss', 13.038)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3591 (epoch 2), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.114), ('loss_bbox', 0.022), ('loss_giou', 0.263), ('loss_self_iou', 0.009), ('cardinality_error', 7.586), ('loss_ce_0', 0.308), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.222), ('loss_caption', 2.223), ('total_loss', 12.425)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3724 (epoch 2), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.123), ('loss_bbox', 0.023), ('loss_giou', 0.265), ('loss_self_iou', 0.009), ('cardinality_error', 7.624), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.624), ('loss_caption_0', 2.38), ('loss_caption', 2.368), ('total_loss', 13.014)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3857 (epoch 2), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.264), ('loss_self_iou', 0.009), ('cardinality_error', 7.489), ('loss_ce_0', 0.312), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.489), ('loss_caption_0', 2.343), ('loss_caption', 2.344), ('total_loss', 12.897)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3990 (epoch 2), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.134), ('loss_bbox', 0.02), ('loss_giou', 0.268), ('loss_self_iou', 0.012), ('cardinality_error', 8.301), ('loss_ce_0', 0.299), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.289), ('loss_self_iou_0', 0.013), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.327), ('loss_caption', 2.346), ('total_loss', 12.9)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 3999: +Bleu_1:0.18812761655735627 +Bleu_2:0.11394688266117041 +Bleu_3:0.06350983100569632 +Bleu_4:0.03295035253718016 +METEOR:0.08673497362280043 +ROUGE_L:0.17099683701262633 +CIDEr:0.534654554166069 +Recall:0.2545535313519452 +Precision:0.4357073390990242 +soda_c:0.06940030844072555 +para_Bleu_1:0.31911536052560924 +para_Bleu_2:0.19074275606485158 +para_Bleu_3:0.11503629156908896 +para_Bleu_4:0.07096292455051724 +para_METEOR:0.14141970569772275 +para_ROUGE_L:0.3133292457236414 +para_CIDEr:0.18756071216976763 + +overall score of iter 3999: 0.3999433424180076 + +Save model at iter 3999 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 3999 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 4123 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.129), ('loss_bbox', 0.021), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.925), ('loss_ce_0', 0.307), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 2.272), ('loss_caption', 2.28), ('total_loss', 12.579)]), +time/iter = 0.678, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4256 (epoch 3), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.121), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.632), ('loss_ce_0', 0.31), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.247), ('loss_caption', 2.252), ('total_loss', 12.484)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4389 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.12), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.011), ('cardinality_error', 7.526), ('loss_ce_0', 0.309), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.272), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.526), ('loss_caption_0', 2.194), ('loss_caption', 2.205), ('total_loss', 12.273)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4522 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.115), ('loss_bbox', 0.019), ('loss_giou', 0.248), ('loss_self_iou', 0.007), ('cardinality_error', 7.519), ('loss_ce_0', 0.303), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.335), ('loss_caption', 2.326), ('total_loss', 12.689)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4655 (epoch 3), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.263), ('loss_self_iou', 0.008), ('cardinality_error', 7.97), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.97), ('loss_caption_0', 2.254), ('loss_caption', 2.267), ('total_loss', 12.545)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4788 (epoch 3), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.253), ('loss_self_iou', 0.008), ('cardinality_error', 7.481), ('loss_ce_0', 0.308), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.481), ('loss_caption_0', 2.208), ('loss_caption', 2.195), ('total_loss', 12.24)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4921 (epoch 3), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.262), ('loss_self_iou', 0.01), ('cardinality_error', 7.842), ('loss_ce_0', 0.305), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.284), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.842), ('loss_caption_0', 2.186), ('loss_caption', 2.196), ('total_loss', 12.289)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5054 (epoch 3), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.121), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.009), ('cardinality_error', 7.887), ('loss_ce_0', 0.305), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.887), ('loss_caption_0', 2.242), ('loss_caption', 2.239), ('total_loss', 12.422)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5187 (epoch 3), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.262), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.305), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.277), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.25), ('loss_caption', 2.246), ('total_loss', 12.483)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5320 (epoch 3), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.12), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.729), ('loss_caption_0', 2.287), ('loss_caption', 2.298), ('total_loss', 12.64)]), +time/iter = 0.161, bad_vid = 0.000 + +Validation results of iter 5332: +Bleu_1:0.19536023703614988 +Bleu_2:0.11676341716851109 +Bleu_3:0.06337153157323498 +Bleu_4:0.031788948303475714 +METEOR:0.09287502887069582 +ROUGE_L:0.18168372139225142 +CIDEr:0.5345089450528974 +Recall:0.26186565000159123 +Precision:0.4578470702650138 +soda_c:0.06891495599002981 +para_Bleu_1:0.3645537642333956 +para_Bleu_2:0.21504928179111618 +para_Bleu_3:0.1297486406737134 +para_Bleu_4:0.08010111193897063 +para_METEOR:0.1518569517959942 +para_ROUGE_L:0.3241825281759821 +para_CIDEr:0.22211083978975357 + +overall score of iter 5332: 0.4540689035247184 + +Save model at iter 5332 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 5332 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 5453 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.022), ('loss_giou', 0.25), ('loss_self_iou', 0.011), ('cardinality_error', 7.519), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.175), ('loss_caption', 2.176), ('total_loss', 12.088)]), +time/iter = 0.716, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5586 (epoch 4), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.12), ('loss_bbox', 0.018), ('loss_giou', 0.252), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.292), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.274), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 2.16), ('loss_caption', 2.132), ('total_loss', 11.979)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5719 (epoch 4), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.13), ('loss_bbox', 0.02), ('loss_giou', 0.255), ('loss_self_iou', 0.008), ('cardinality_error', 8.451), ('loss_ce_0', 0.302), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.273), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.451), ('loss_caption_0', 2.166), ('loss_caption', 2.164), ('total_loss', 12.113)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5852 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.246), ('loss_self_iou', 0.007), ('cardinality_error', 7.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.267), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.122), ('loss_caption', 2.111), ('total_loss', 11.841)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5985 (epoch 4), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.243), ('loss_self_iou', 0.009), ('cardinality_error', 7.474), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.263), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.474), ('loss_caption_0', 2.149), ('loss_caption', 2.14), ('total_loss', 11.926)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6118 (epoch 4), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.113), ('loss_bbox', 0.018), ('loss_giou', 0.241), ('loss_self_iou', 0.008), ('cardinality_error', 7.639), ('loss_ce_0', 0.302), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.259), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.639), ('loss_caption_0', 2.235), ('loss_caption', 2.215), ('total_loss', 12.218)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6251 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.125), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 7.857), ('loss_ce_0', 0.301), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.857), ('loss_caption_0', 2.235), ('loss_caption', 2.226), ('total_loss', 12.328)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6384 (epoch 4), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.02), ('loss_giou', 0.246), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.301), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.208), ('loss_caption', 2.183), ('total_loss', 12.157)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6517 (epoch 4), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.12), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.872), ('loss_ce_0', 0.295), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.872), ('loss_caption_0', 2.135), ('loss_caption', 2.155), ('total_loss', 11.99)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6650 (epoch 4), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.112), ('loss_bbox', 0.021), ('loss_giou', 0.244), ('loss_self_iou', 0.008), ('cardinality_error', 7.398), ('loss_ce_0', 0.297), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.26), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.398), ('loss_caption_0', 2.205), ('loss_caption', 2.202), ('total_loss', 12.127)]), +time/iter = 0.156, bad_vid = 0.000 + +Validation results of iter 6665: +Bleu_1:0.19366491706119263 +Bleu_2:0.1161802397372496 +Bleu_3:0.06381908710297783 +Bleu_4:0.0310996008751752 +METEOR:0.0900086447067842 +ROUGE_L:0.1772625018945245 +CIDEr:0.5329339889166991 +Recall:0.27822837264850414 +Precision:0.4414053002674447 +soda_c:0.0725148309247326 +para_Bleu_1:0.36779729697992286 +para_Bleu_2:0.2189609464261768 +para_Bleu_3:0.13170237886801614 +para_Bleu_4:0.08102932652379062 +para_METEOR:0.15287168689015676 +para_ROUGE_L:0.32609559286330886 +para_CIDEr:0.24981796796266917 + +overall score of iter 6665: 0.48371898137661656 + +Save model at iter 6665 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 6665 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 6783 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.117), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.586), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.02), ('loss_caption', 2.014), ('total_loss', 11.332)]), +time/iter = 0.689, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6916 (epoch 5), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.249), ('loss_self_iou', 0.008), ('cardinality_error', 7.519), ('loss_ce_0', 0.302), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.264), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.118), ('loss_caption', 2.101), ('total_loss', 11.817)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7049 (epoch 5), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.25), ('loss_self_iou', 0.007), ('cardinality_error', 7.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.105), ('loss_caption', 2.111), ('total_loss', 11.78)]), +time/iter = 0.172, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7182 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.242), ('loss_self_iou', 0.008), ('cardinality_error', 7.594), ('loss_ce_0', 0.288), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.594), ('loss_caption_0', 2.194), ('loss_caption', 2.195), ('total_loss', 12.045)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7315 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.123), ('loss_bbox', 0.02), ('loss_giou', 0.254), ('loss_self_iou', 0.009), ('cardinality_error', 8.301), ('loss_ce_0', 0.291), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.096), ('loss_caption', 2.09), ('total_loss', 11.741)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7448 (epoch 5), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.234), ('loss_self_iou', 0.006), ('cardinality_error', 7.677), ('loss_ce_0', 0.292), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.251), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.076), ('loss_caption', 2.063), ('total_loss', 11.513)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7581 (epoch 5), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.116), ('loss_bbox', 0.019), ('loss_giou', 0.238), ('loss_self_iou', 0.008), ('cardinality_error', 7.534), ('loss_ce_0', 0.295), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.114), ('loss_caption', 2.112), ('total_loss', 11.718)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7714 (epoch 5), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.677), ('loss_ce_0', 0.291), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.167), ('loss_caption', 2.179), ('total_loss', 11.932)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7847 (epoch 5), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.252), ('loss_self_iou', 0.009), ('cardinality_error', 8.053), ('loss_ce_0', 0.289), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.053), ('loss_caption_0', 2.106), ('loss_caption', 2.115), ('total_loss', 11.804)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7980 (epoch 5), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.249), ('loss_self_iou', 0.007), ('cardinality_error', 7.902), ('loss_ce_0', 0.295), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.151), ('loss_caption', 2.153), ('total_loss', 11.979)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 7998: +Bleu_1:0.19874944106127662 +Bleu_2:0.12266046915797622 +Bleu_3:0.07150852984916518 +Bleu_4:0.036185181004552064 +METEOR:0.09274687098087099 +ROUGE_L:0.18413336093424784 +CIDEr:0.5727051685734265 +Recall:0.259037909270404 +Precision:0.451289465457956 +soda_c:0.07263494732248185 +para_Bleu_1:0.32307562783294125 +para_Bleu_2:0.1944214796418441 +para_Bleu_3:0.11901149393254483 +para_Bleu_4:0.07454555120453704 +para_METEOR:0.14324209261218024 +para_ROUGE_L:0.31918573126228 +para_CIDEr:0.23096832321460165 + +overall score of iter 7998: 0.4487559670313189 + +Save model at iter 7998 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 8113 (epoch 6), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.236), ('loss_self_iou', 0.008), ('cardinality_error', 7.617), ('loss_ce_0', 0.295), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.036), ('loss_caption', 2.044), ('total_loss', 11.427)]), +time/iter = 0.677, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8246 (epoch 6), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.237), ('loss_self_iou', 0.006), ('cardinality_error', 7.827), ('loss_ce_0', 0.283), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 2.055), ('loss_caption', 2.057), ('total_loss', 11.458)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8379 (epoch 6), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.118), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.286), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.046), ('loss_caption', 2.041), ('total_loss', 11.331)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8512 (epoch 6), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.228), ('loss_self_iou', 0.006), ('cardinality_error', 7.654), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.245), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.654), ('loss_caption_0', 1.991), ('loss_caption', 1.997), ('total_loss', 11.118)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8645 (epoch 6), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 8.068), ('loss_ce_0', 0.287), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.068), ('loss_caption_0', 2.094), ('loss_caption', 2.097), ('total_loss', 11.714)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8778 (epoch 6), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.121), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.008), ('cardinality_error', 8.008), ('loss_ce_0', 0.286), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.258), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.008), ('loss_caption_0', 2.092), ('loss_caption', 2.092), ('total_loss', 11.63)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8911 (epoch 6), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.338), ('loss_ce_0', 0.297), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.248), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.338), ('loss_caption_0', 2.051), ('loss_caption', 2.054), ('total_loss', 11.446)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9044 (epoch 6), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.105), ('loss_bbox', 0.02), ('loss_giou', 0.227), ('loss_self_iou', 0.008), ('cardinality_error', 7.226), ('loss_ce_0', 0.292), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.226), ('loss_caption_0', 2.08), ('loss_caption', 2.084), ('total_loss', 11.478)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9177 (epoch 6), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.254), ('loss_self_iou', 0.007), ('cardinality_error', 7.977), ('loss_ce_0', 0.288), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.977), ('loss_caption_0', 2.046), ('loss_caption', 2.031), ('total_loss', 11.546)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9310 (epoch 6), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.236), ('loss_self_iou', 0.006), ('cardinality_error', 7.97), ('loss_ce_0', 0.281), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.986), ('loss_caption', 1.995), ('total_loss', 11.157)]), +time/iter = 0.152, bad_vid = 0.000 + +Validation results of iter 9331: +Bleu_1:0.2003309018825777 +Bleu_2:0.1225756065112458 +Bleu_3:0.06724461390362559 +Bleu_4:0.033684328156599955 +METEOR:0.0938288297360794 +ROUGE_L:0.1832565856913202 +CIDEr:0.5805494889367487 +Recall:0.28578288505804933 +Precision:0.4570872842207636 +soda_c:0.07457933387713374 +para_Bleu_1:0.3713316702717572 +para_Bleu_2:0.22391267992808692 +para_Bleu_3:0.1360620228892395 +para_Bleu_4:0.08475146307949002 +para_METEOR:0.15553928732702577 +para_ROUGE_L:0.3279787647771023 +para_CIDEr:0.24807495620487915 + +overall score of iter 9331: 0.4883657066113949 + +Save model at iter 9331 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 9331 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 9443 (epoch 7), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.226), ('loss_self_iou', 0.006), ('cardinality_error', 7.617), ('loss_ce_0', 0.292), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.065), ('loss_caption', 2.061), ('total_loss', 11.394)]), +time/iter = 0.717, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9576 (epoch 7), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.02), ('loss_giou', 0.231), ('loss_self_iou', 0.006), ('cardinality_error', 7.917), ('loss_ce_0', 0.284), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.917), ('loss_caption_0', 1.977), ('loss_caption', 1.974), ('total_loss', 11.093)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9709 (epoch 7), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.117), ('loss_bbox', 0.016), ('loss_giou', 0.224), ('loss_self_iou', 0.006), ('cardinality_error', 8.098), ('loss_ce_0', 0.29), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.098), ('loss_caption_0', 2.051), ('loss_caption', 2.063), ('total_loss', 11.373)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9842 (epoch 7), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.242), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.286), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.939), ('loss_caption', 1.953), ('total_loss', 11.058)]), +time/iter = 0.169, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9975 (epoch 7), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.116), ('loss_bbox', 0.017), ('loss_giou', 0.238), ('loss_self_iou', 0.006), ('cardinality_error', 8.233), ('loss_ce_0', 0.281), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.255), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.024), ('loss_caption', 2.026), ('total_loss', 11.31)]), +time/iter = 0.167, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10108 (epoch 7), +loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.018), ('loss_giou', 0.232), ('loss_self_iou', 0.006), ('cardinality_error', 7.466), ('loss_ce_0', 0.279), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.466), ('loss_caption_0', 1.878), ('loss_caption', 1.882), ('total_loss', 10.667)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10241 (epoch 7), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.018), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.722), ('loss_ce_0', 0.282), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.984), ('loss_caption', 1.988), ('total_loss', 11.165)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10374 (epoch 7), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.225), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.285), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 2.089), ('loss_caption', 2.094), ('total_loss', 11.498)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10507 (epoch 7), +loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.113), ('loss_bbox', 0.019), ('loss_giou', 0.22), ('loss_self_iou', 0.007), ('cardinality_error', 7.564), ('loss_ce_0', 0.283), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.564), ('loss_caption_0', 1.936), ('loss_caption', 1.935), ('total_loss', 10.84)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10640 (epoch 7), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.232), ('loss_self_iou', 0.008), ('cardinality_error', 7.549), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.249), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.549), ('loss_caption_0', 2.041), ('loss_caption', 2.042), ('total_loss', 11.323)]), +time/iter = 0.178, bad_vid = 0.000 + +Validation results of iter 10664: +Bleu_1:0.19584871429233122 +Bleu_2:0.1203954133477019 +Bleu_3:0.06765236989260215 +Bleu_4:0.03515047236439923 +METEOR:0.09347581038898298 +ROUGE_L:0.18336361365161372 +CIDEr:0.5642570328531701 +Recall:0.287053410514844 +Precision:0.4506790316418327 +soda_c:0.07315525040409161 +para_Bleu_1:0.39595219023577966 +para_Bleu_2:0.23717913606151478 +para_Bleu_3:0.14480681642134902 +para_Bleu_4:0.0901695364250172 +para_METEOR:0.16127903027678414 +para_ROUGE_L:0.3324403291093838 +para_CIDEr:0.23804687234043756 + +overall score of iter 10664: 0.48949543904223886 + +Save model at iter 10664 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 10664 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 10773 (epoch 8), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.114), ('loss_bbox', 0.017), ('loss_giou', 0.235), ('loss_self_iou', 0.006), ('cardinality_error', 7.94), ('loss_ce_0', 0.278), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.851), ('loss_caption', 1.84), ('total_loss', 10.561)]), +time/iter = 0.724, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10906 (epoch 8), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.109), ('loss_bbox', 0.017), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.218), ('loss_ce_0', 0.278), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.218), ('loss_caption_0', 1.945), ('loss_caption', 1.948), ('total_loss', 10.791)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11039 (epoch 8), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.108), ('loss_bbox', 0.017), ('loss_giou', 0.207), ('loss_self_iou', 0.006), ('cardinality_error', 7.579), ('loss_ce_0', 0.283), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.579), ('loss_caption_0', 1.92), ('loss_caption', 1.927), ('total_loss', 10.664)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11172 (epoch 8), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.451), ('loss_ce_0', 0.279), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.451), ('loss_caption_0', 1.91), ('loss_caption', 1.9), ('total_loss', 10.635)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11305 (epoch 8), +loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.125), ('loss_bbox', 0.017), ('loss_giou', 0.233), ('loss_self_iou', 0.006), ('cardinality_error', 8.09), ('loss_ce_0', 0.276), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.244), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.876), ('loss_caption', 1.877), ('total_loss', 10.648)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11438 (epoch 8), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.113), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.269), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.981), ('loss_caption', 1.968), ('total_loss', 10.865)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11571 (epoch 8), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.006), ('cardinality_error', 7.699), ('loss_ce_0', 0.277), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.833), ('loss_caption', 1.846), ('total_loss', 10.461)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11704 (epoch 8), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.21), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.91), ('loss_caption', 1.915), ('total_loss', 10.628)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11837 (epoch 8), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.215), ('loss_self_iou', 0.007), ('cardinality_error', 8.0), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.0), ('loss_caption_0', 1.936), ('loss_caption', 1.939), ('total_loss', 10.726)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11970 (epoch 8), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.22), ('loss_self_iou', 0.006), ('cardinality_error', 8.158), ('loss_ce_0', 0.27), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.158), ('loss_caption_0', 1.953), ('loss_caption', 1.962), ('total_loss', 10.881)]), +time/iter = 0.168, bad_vid = 0.000 + +Validation results of iter 11997: +Bleu_1:0.19696025394358163 +Bleu_2:0.12042554867022627 +Bleu_3:0.06805715701089529 +Bleu_4:0.034063345644385214 +METEOR:0.09208296372249718 +ROUGE_L:0.1803782633150628 +CIDEr:0.5812603125344058 +Recall:0.29169024735901117 +Precision:0.44299129936438486 +soda_c:0.07606608300691252 +para_Bleu_1:0.383549187276652 +para_Bleu_2:0.23192713278728125 +para_Bleu_3:0.14217181061136971 +para_Bleu_4:0.0892715976218228 +para_METEOR:0.16074434603101373 +para_ROUGE_L:0.3336567463040183 +para_CIDEr:0.2859809872200661 + +overall score of iter 11997: 0.5359969308729027 + +Save model at iter 11997 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 11997 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 12103 (epoch 9), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.216), ('loss_self_iou', 0.006), ('cardinality_error', 8.038), ('loss_ce_0', 0.274), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.038), ('loss_caption_0', 1.832), ('loss_caption', 1.845), ('total_loss', 10.35)]), +time/iter = 0.705, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12236 (epoch 9), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.206), ('loss_self_iou', 0.005), ('cardinality_error', 7.812), ('loss_ce_0', 0.266), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.968), ('loss_caption', 1.959), ('total_loss', 10.757)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12369 (epoch 9), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.118), ('loss_bbox', 0.016), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.27), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.89), ('loss_caption', 1.903), ('total_loss', 10.534)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12502 (epoch 9), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.108), ('loss_bbox', 0.016), ('loss_giou', 0.205), ('loss_self_iou', 0.006), ('cardinality_error', 7.684), ('loss_ce_0', 0.268), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.684), ('loss_caption_0', 1.903), ('loss_caption', 1.905), ('total_loss', 10.519)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12635 (epoch 9), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.218), ('loss_self_iou', 0.005), ('cardinality_error', 7.947), ('loss_ce_0', 0.269), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.232), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.947), ('loss_caption_0', 1.822), ('loss_caption', 1.826), ('total_loss', 10.284)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12768 (epoch 9), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.111), ('loss_bbox', 0.017), ('loss_giou', 0.219), ('loss_self_iou', 0.008), ('cardinality_error', 7.669), ('loss_ce_0', 0.276), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.905), ('loss_caption', 1.909), ('total_loss', 10.662)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12901 (epoch 9), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.639), ('loss_ce_0', 0.267), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.639), ('loss_caption_0', 1.856), ('loss_caption', 1.863), ('total_loss', 10.344)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13034 (epoch 9), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.274), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.841), ('loss_caption', 1.841), ('total_loss', 10.356)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13167 (epoch 9), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.018), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.406), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.406), ('loss_caption_0', 1.931), ('loss_caption', 1.927), ('total_loss', 10.663)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13300 (epoch 9), +loss = OrderedDict([('loss_ce', 0.274), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.005), ('cardinality_error', 7.737), ('loss_ce_0', 0.272), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.853), ('loss_caption', 1.849), ('total_loss', 10.379)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 13330: +Bleu_1:0.20446290018298774 +Bleu_2:0.12418412895577716 +Bleu_3:0.06899010124646034 +Bleu_4:0.03428116460131532 +METEOR:0.09595521703655657 +ROUGE_L:0.1876517650928566 +CIDEr:0.5887832993219201 +Recall:0.3017153873964599 +Precision:0.4588439095550697 +soda_c:0.07875391677883807 +para_Bleu_1:0.3953706124668704 +para_Bleu_2:0.24043007714841402 +para_Bleu_3:0.14833197751929023 +para_Bleu_4:0.09386644902900565 +para_METEOR:0.16476396966168239 +para_ROUGE_L:0.33760319454244797 +para_CIDEr:0.31194480042956774 + +overall score of iter 13330: 0.5705752191202558 + +Save model at iter 13330 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 13330 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 13433 (epoch 10), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.112), ('loss_bbox', 0.017), ('loss_giou', 0.217), ('loss_self_iou', 0.006), ('cardinality_error', 7.835), ('loss_ce_0', 0.267), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.835), ('loss_caption_0', 1.804), ('loss_caption', 1.811), ('total_loss', 10.223)]), +time/iter = 0.700, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13566 (epoch 10), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.116), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.005), ('cardinality_error', 7.774), ('loss_ce_0', 0.266), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.221), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.884), ('loss_caption', 1.887), ('total_loss', 10.42)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13699 (epoch 10), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.201), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.823), ('loss_caption', 1.806), ('total_loss', 10.083)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13832 (epoch 10), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.699), ('loss_ce_0', 0.271), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.855), ('loss_caption', 1.857), ('total_loss', 10.374)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13965 (epoch 10), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.105), ('loss_bbox', 0.016), ('loss_giou', 0.196), ('loss_self_iou', 0.006), ('cardinality_error', 7.128), ('loss_ce_0', 0.271), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.128), ('loss_caption_0', 1.809), ('loss_caption', 1.8), ('total_loss', 10.055)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14098 (epoch 10), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.016), ('loss_giou', 0.213), ('loss_self_iou', 0.007), ('cardinality_error', 7.925), ('loss_ce_0', 0.273), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.863), ('loss_caption', 1.863), ('total_loss', 10.433)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14231 (epoch 10), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.007), ('cardinality_error', 7.82), ('loss_ce_0', 0.262), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.222), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.936), ('loss_caption', 1.929), ('total_loss', 10.624)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14364 (epoch 10), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.263), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.227), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.757), ('loss_caption', 1.754), ('total_loss', 9.948)]), +time/iter = 0.185, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14497 (epoch 10), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.11), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.265), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.896), ('loss_caption', 1.894), ('total_loss', 10.407)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14630 (epoch 10), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.925), ('loss_ce_0', 0.261), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.84), ('loss_caption', 1.842), ('total_loss', 10.253)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 14663: +Bleu_1:0.19267153393038786 +Bleu_2:0.11732781330402656 +Bleu_3:0.06746115616325608 +Bleu_4:0.03425583839334337 +METEOR:0.08963300348041837 +ROUGE_L:0.17480207136309905 +CIDEr:0.575137603362526 +Recall:0.30432682743951917 +Precision:0.4353044354138446 +soda_c:0.07762847290423684 +para_Bleu_1:0.393384019586376 +para_Bleu_2:0.23835405770332685 +para_Bleu_3:0.14545808678454117 +para_Bleu_4:0.09085202435904723 +para_METEOR:0.16354570345255123 +para_ROUGE_L:0.3343729651839732 +para_CIDEr:0.27098453497923136 + +overall score of iter 14663: 0.5253822627908299 + +Save model at iter 14663 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 14763 (epoch 11), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.006), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.225), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.87), ('loss_caption', 1.877), ('total_loss', 10.398)]), +time/iter = 0.690, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14896 (epoch 11), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.796), ('loss_caption', 1.784), ('total_loss', 9.979)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15029 (epoch 11), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.006), ('cardinality_error', 7.414), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.414), ('loss_caption_0', 1.763), ('loss_caption', 1.767), ('total_loss', 9.842)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15162 (epoch 11), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.262), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.781), ('loss_caption', 1.781), ('total_loss', 9.916)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15295 (epoch 11), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.662), ('loss_ce_0', 0.255), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.735), ('loss_caption', 1.75), ('total_loss', 9.755)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15428 (epoch 11), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.114), ('loss_bbox', 0.015), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.992), ('loss_ce_0', 0.261), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.852), ('loss_caption', 1.86), ('total_loss', 10.298)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15561 (epoch 11), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.006), ('cardinality_error', 8.068), ('loss_ce_0', 0.257), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.068), ('loss_caption_0', 1.878), ('loss_caption', 1.866), ('total_loss', 10.314)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15694 (epoch 11), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.202), ('loss_self_iou', 0.004), ('cardinality_error', 7.647), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.647), ('loss_caption_0', 1.7), ('loss_caption', 1.684), ('total_loss', 9.569)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15827 (epoch 11), +loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.104), ('loss_bbox', 0.016), ('loss_giou', 0.194), ('loss_self_iou', 0.005), ('cardinality_error', 7.722), ('loss_ce_0', 0.257), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.848), ('loss_caption', 1.839), ('total_loss', 10.119)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15960 (epoch 11), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.107), ('loss_bbox', 0.015), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 7.609), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.609), ('loss_caption_0', 1.847), ('loss_caption', 1.858), ('total_loss', 10.198)]), +time/iter = 0.161, bad_vid = 0.000 + +Validation results of iter 15996: +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 + +overall score of iter 15996: 0.5868440997381064 + +Save model at iter 15996 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 15996 to checkpoint file. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 16093 (epoch 12), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.258), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.773), ('loss_caption', 1.769), ('total_loss', 9.789)]), +time/iter = 0.727, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16226 (epoch 12), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.004), ('cardinality_error', 7.805), ('loss_ce_0', 0.259), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.805), ('loss_caption_0', 1.743), ('loss_caption', 1.749), ('total_loss', 9.786)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16359 (epoch 12), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.116), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.797), ('loss_caption', 1.778), ('total_loss', 9.972)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16492 (epoch 12), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.105), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.383), ('loss_ce_0', 0.257), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.383), ('loss_caption_0', 1.796), ('loss_caption', 1.808), ('total_loss', 9.899)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16625 (epoch 12), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.005), ('cardinality_error', 7.782), ('loss_ce_0', 0.256), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.78), ('loss_caption', 1.779), ('total_loss', 9.812)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16758 (epoch 12), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.962), ('loss_ce_0', 0.252), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.962), ('loss_caption_0', 1.795), ('loss_caption', 1.806), ('total_loss', 9.948)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16891 (epoch 12), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.016), ('loss_giou', 0.199), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.255), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.788), ('loss_caption', 1.782), ('total_loss', 9.914)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17024 (epoch 12), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.511), ('loss_ce_0', 0.26), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.511), ('loss_caption_0', 1.717), ('loss_caption', 1.72), ('total_loss', 9.666)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17157 (epoch 12), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.252), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.738), ('loss_caption', 1.749), ('total_loss', 9.638)]), +time/iter = 0.182, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17290 (epoch 12), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.932), ('loss_ce_0', 0.254), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.932), ('loss_caption_0', 1.815), ('loss_caption', 1.83), ('total_loss', 10.067)]), +time/iter = 0.163, bad_vid = 0.000 + +Validation results of iter 17329: +Bleu_1:0.19294534256446427 +Bleu_2:0.11789730285267924 +Bleu_3:0.06601509377472357 +Bleu_4:0.03274421971508606 +METEOR:0.0906445074413136 +ROUGE_L:0.17678145420382357 +CIDEr:0.5750907875125135 +Recall:0.3073352674556176 +Precision:0.4434536834427428 +soda_c:0.07896521325127955 +para_Bleu_1:0.39483511792471604 +para_Bleu_2:0.23988438429479647 +para_Bleu_3:0.1464330354033768 +para_Bleu_4:0.09122283851671699 +para_METEOR:0.16480200992253577 +para_ROUGE_L:0.33317486176302236 +para_CIDEr:0.29080350784714515 + +overall score of iter 17329: 0.5468283562863979 + +Save model at iter 17329 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 17423 (epoch 13), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.865), ('loss_caption', 1.881), ('total_loss', 10.261)]), +time/iter = 0.713, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17556 (epoch 13), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.193), ('loss_self_iou', 0.004), ('cardinality_error', 7.737), ('loss_ce_0', 0.253), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.744), ('loss_caption', 1.743), ('total_loss', 9.707)]), +time/iter = 0.168, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17689 (epoch 13), +loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.199), ('loss_self_iou', 0.006), ('cardinality_error', 7.602), ('loss_ce_0', 0.262), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.602), ('loss_caption_0', 1.835), ('loss_caption', 1.819), ('total_loss', 10.1)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17822 (epoch 13), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.191), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.249), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.681), ('loss_caption', 1.67), ('total_loss', 9.397)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17955 (epoch 13), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.184), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.252), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.757), ('loss_caption', 1.745), ('total_loss', 9.658)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18088 (epoch 13), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.534), ('loss_ce_0', 0.251), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.534), ('loss_caption_0', 1.703), ('loss_caption', 1.701), ('total_loss', 9.39)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18221 (epoch 13), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.201), ('loss_self_iou', 0.005), ('cardinality_error', 8.211), ('loss_ce_0', 0.252), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.211), ('loss_caption_0', 1.824), ('loss_caption', 1.816), ('total_loss', 10.053)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18354 (epoch 13), +loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.249), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.792), ('loss_caption', 1.779), ('total_loss', 9.874)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18487 (epoch 13), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.251), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.826), ('loss_caption', 1.81), ('total_loss', 9.979)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18620 (epoch 13), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.003), ('cardinality_error', 7.737), ('loss_ce_0', 0.251), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.767), ('loss_caption', 1.771), ('total_loss', 9.784)]), +time/iter = 0.153, bad_vid = 0.000 + +Validation results of iter 18662: +Bleu_1:0.1916652028982354 +Bleu_2:0.11864819375256218 +Bleu_3:0.06801290454817709 +Bleu_4:0.03421778123301331 +METEOR:0.08890100804282676 +ROUGE_L:0.17229926562968575 +CIDEr:0.5719694906113042 +Recall:0.3115151404333572 +Precision:0.42734448265082836 +soda_c:0.07979305036983636 +para_Bleu_1:0.3972508455506424 +para_Bleu_2:0.24317507500304622 +para_Bleu_3:0.1497047997976745 +para_Bleu_4:0.09437727320664267 +para_METEOR:0.16651343432042678 +para_ROUGE_L:0.33875534436877147 +para_CIDEr:0.29220356232363026 + +overall score of iter 18662: 0.5530942698506998 + +Save model at iter 18662 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 18753 (epoch 14), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 8.241), ('loss_ce_0', 0.251), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.241), ('loss_caption_0', 1.758), ('loss_caption', 1.759), ('total_loss', 9.756)]), +time/iter = 0.731, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18886 (epoch 14), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.182), ('loss_self_iou', 0.004), ('cardinality_error', 7.436), ('loss_ce_0', 0.245), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.436), ('loss_caption_0', 1.696), ('loss_caption', 1.692), ('total_loss', 9.366)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19019 (epoch 14), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 7.692), ('loss_ce_0', 0.242), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.73), ('loss_caption', 1.729), ('total_loss', 9.496)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19152 (epoch 14), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.112), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.251), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.779), ('loss_caption', 1.771), ('total_loss', 9.714)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19285 (epoch 14), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.014), ('loss_giou', 0.194), ('loss_self_iou', 0.004), ('cardinality_error', 7.669), ('loss_ce_0', 0.25), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.76), ('loss_caption', 1.772), ('total_loss', 9.759)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19418 (epoch 14), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 8.256), ('loss_ce_0', 0.245), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.256), ('loss_caption_0', 1.754), ('loss_caption', 1.758), ('total_loss', 9.747)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19551 (epoch 14), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.253), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.68), ('loss_caption', 1.689), ('total_loss', 9.3)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19684 (epoch 14), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.005), ('cardinality_error', 7.474), ('loss_ce_0', 0.262), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.474), ('loss_caption_0', 1.81), ('loss_caption', 1.803), ('total_loss', 9.923)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19817 (epoch 14), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.247), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.769), ('loss_caption', 1.765), ('total_loss', 9.677)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19950 (epoch 14), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.253), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.736), ('loss_caption', 1.748), ('total_loss', 9.654)]), +time/iter = 0.153, bad_vid = 0.000 + +Validation results of iter 19995: +Bleu_1:0.19012877786294885 +Bleu_2:0.11743680046097797 +Bleu_3:0.06623934110461578 +Bleu_4:0.03314975306654321 +METEOR:0.08857227272587216 +ROUGE_L:0.17208518718096077 +CIDEr:0.5689998070546577 +Recall:0.3090681299310951 +Precision:0.43095498593310433 +soda_c:0.08081534748318767 +para_Bleu_1:0.3949292262433903 +para_Bleu_2:0.24183495416706074 +para_Bleu_3:0.1493168425692173 +para_Bleu_4:0.0941904023418332 +para_METEOR:0.16661877157717606 +para_ROUGE_L:0.3391544295873436 +para_CIDEr:0.3057631644012313 + +overall score of iter 19995: 0.5665723383202406 + +Save model at iter 19995 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 20083 (epoch 15), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.519), ('loss_ce_0', 0.257), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.519), ('loss_caption_0', 1.743), ('loss_caption', 1.756), ('total_loss', 9.655)]), +time/iter = 0.703, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20216 (epoch 15), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.759), ('loss_ce_0', 0.244), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.759), ('loss_caption_0', 1.79), ('loss_caption', 1.781), ('total_loss', 9.713)]), +time/iter = 0.168, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20349 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.245), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.749), ('loss_caption', 1.759), ('total_loss', 9.675)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20482 (epoch 15), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.005), ('cardinality_error', 7.94), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.694), ('loss_caption', 1.715), ('total_loss', 9.502)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20615 (epoch 15), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.005), ('cardinality_error', 7.368), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.368), ('loss_caption_0', 1.77), ('loss_caption', 1.771), ('total_loss', 9.775)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20748 (epoch 15), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.857), ('loss_ce_0', 0.247), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.857), ('loss_caption_0', 1.786), ('loss_caption', 1.773), ('total_loss', 9.695)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20881 (epoch 15), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.103), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.594), ('loss_ce_0', 0.242), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.594), ('loss_caption_0', 1.746), ('loss_caption', 1.748), ('total_loss', 9.541)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21014 (epoch 15), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 8.09), ('loss_ce_0', 0.249), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.709), ('loss_caption', 1.698), ('total_loss', 9.49)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21147 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.115), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.248), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.733), ('loss_caption', 1.732), ('total_loss', 9.57)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21280 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.632), ('loss_ce_0', 0.245), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.632), ('loss_caption_0', 1.646), ('loss_caption', 1.658), ('total_loss', 9.233)]), +time/iter = 0.152, bad_vid = 0.000 + +Validation results of iter 21328: +Bleu_1:0.1927355202990476 +Bleu_2:0.11755729236198051 +Bleu_3:0.06532950485231373 +Bleu_4:0.0318670348131602 +METEOR:0.08966953019840175 +ROUGE_L:0.17549405824640266 +CIDEr:0.5708533801009449 +Recall:0.31055728552993345 +Precision:0.4412863394810881 +soda_c:0.08079399116249976 +para_Bleu_1:0.3847850395827542 +para_Bleu_2:0.23591168028694995 +para_Bleu_3:0.14500000021146267 +para_Bleu_4:0.09097906463153684 +para_METEOR:0.1633729521776342 +para_ROUGE_L:0.33764324525807 +para_CIDEr:0.3225522700715415 + +overall score of iter 21328: 0.5769042868807126 + +Save model at iter 21328 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 21413 (epoch 16), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.541), ('loss_ce_0', 0.239), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.637), ('loss_caption', 1.633), ('total_loss', 9.069)]), +time/iter = 0.698, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21546 (epoch 16), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.172), ('loss_self_iou', 0.004), ('cardinality_error', 7.624), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.624), ('loss_caption_0', 1.773), ('loss_caption', 1.784), ('total_loss', 9.621)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21679 (epoch 16), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.238), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.809), ('loss_caption', 1.805), ('total_loss', 9.791)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21812 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.677), ('loss_ce_0', 0.25), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.677), ('loss_caption_0', 1.674), ('loss_caption', 1.676), ('total_loss', 9.277)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21945 (epoch 16), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.192), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.244), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.713), ('loss_caption', 1.714), ('total_loss', 9.531)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22078 (epoch 16), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.707), ('loss_ce_0', 0.247), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.772), ('loss_caption', 1.758), ('total_loss', 9.738)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22211 (epoch 16), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.101), ('loss_bbox', 0.013), ('loss_giou', 0.18), ('loss_self_iou', 0.005), ('cardinality_error', 7.541), ('loss_ce_0', 0.249), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.665), ('loss_caption', 1.66), ('total_loss', 9.243)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22344 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 8.008), ('loss_ce_0', 0.248), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.008), ('loss_caption_0', 1.799), ('loss_caption', 1.784), ('total_loss', 9.823)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22477 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.184), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.247), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.722), ('loss_caption', 1.733), ('total_loss', 9.525)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22610 (epoch 16), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 7.729), ('loss_ce_0', 0.245), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.664), ('loss_caption', 1.667), ('total_loss', 9.297)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 22661: +Bleu_1:0.1905629005997804 +Bleu_2:0.11689699082903934 +Bleu_3:0.06544029555928756 +Bleu_4:0.03330988693345351 +METEOR:0.08938496175202132 +ROUGE_L:0.17298359351524648 +CIDEr:0.5732307929342625 +Recall:0.309604513071417 +Precision:0.43046524955715343 +soda_c:0.08056479007503722 +para_Bleu_1:0.3975304274857351 +para_Bleu_2:0.24253918136446623 +para_Bleu_3:0.14848895422464012 +para_Bleu_4:0.09337330751749118 +para_METEOR:0.16677196164785574 +para_ROUGE_L:0.33750187221117683 +para_CIDEr:0.31278894258081524 + +overall score of iter 22661: 0.5729342117461622 + +Save model at iter 22661 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 22743 (epoch 17), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.714), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.714), ('loss_caption_0', 1.773), ('loss_caption', 1.775), ('total_loss', 9.803)]), +time/iter = 0.714, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22876 (epoch 17), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.774), ('loss_ce_0', 0.249), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.76), ('loss_caption', 1.759), ('total_loss', 9.631)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23009 (epoch 17), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.105), ('loss_bbox', 0.012), ('loss_giou', 0.171), ('loss_self_iou', 0.003), ('cardinality_error', 7.872), ('loss_ce_0', 0.237), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.872), ('loss_caption_0', 1.69), ('loss_caption', 1.688), ('total_loss', 9.229)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23142 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.098), ('loss_bbox', 0.013), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.744), ('loss_ce_0', 0.239), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.66), ('loss_caption', 1.663), ('total_loss', 9.173)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23275 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 7.82), ('loss_ce_0', 0.242), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.727), ('loss_caption', 1.741), ('total_loss', 9.535)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23408 (epoch 17), +loss = OrderedDict([('loss_ce', 0.235), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.173), ('loss_self_iou', 0.004), ('cardinality_error', 7.083), ('loss_ce_0', 0.235), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.083), ('loss_caption_0', 1.678), ('loss_caption', 1.68), ('total_loss', 9.181)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23541 (epoch 17), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.185), ('loss_self_iou', 0.003), ('cardinality_error', 7.782), ('loss_ce_0', 0.253), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.686), ('loss_caption', 1.674), ('total_loss', 9.361)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23674 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.242), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.734), ('loss_caption', 1.755), ('total_loss', 9.502)]), +time/iter = 0.169, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23807 (epoch 17), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 8.023), ('loss_ce_0', 0.248), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.838), ('loss_caption', 1.842), ('total_loss', 10.01)]), +time/iter = 0.176, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23940 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.246), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.661), ('loss_caption', 1.655), ('total_loss', 9.188)]), +time/iter = 0.168, bad_vid = 0.000 + +Validation results of iter 23994: +Bleu_1:0.19099469488969467 +Bleu_2:0.11646897839764006 +Bleu_3:0.06451308365995856 +Bleu_4:0.032200079484133 +METEOR:0.08912416771202449 +ROUGE_L:0.1730757893125124 +CIDEr:0.5693051160396969 +Recall:0.3097042977992106 +Precision:0.43274547601681085 +soda_c:0.08084297498321232 +para_Bleu_1:0.3924031546442418 +para_Bleu_2:0.23911474626028398 +para_Bleu_3:0.14600811918196227 +para_Bleu_4:0.09107950853175292 +para_METEOR:0.16594454181978452 +para_ROUGE_L:0.33729101832099057 +para_CIDEr:0.30892642009784 + +overall score of iter 23994: 0.5659504704493774 + +Save model at iter 23994 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 24073 (epoch 18), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.11), ('loss_bbox', 0.012), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.97), ('loss_ce_0', 0.246), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.689), ('loss_caption', 1.683), ('total_loss', 9.309)]), +time/iter = 0.720, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24206 (epoch 18), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.118), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 8.286), ('loss_ce_0', 0.236), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.286), ('loss_caption_0', 1.712), ('loss_caption', 1.715), ('total_loss', 9.432)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24339 (epoch 18), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.098), ('loss_bbox', 0.012), ('loss_giou', 0.167), ('loss_self_iou', 0.003), ('cardinality_error', 7.316), ('loss_ce_0', 0.247), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.316), ('loss_caption_0', 1.695), ('loss_caption', 1.701), ('total_loss', 9.257)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24472 (epoch 18), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.003), ('cardinality_error', 7.459), ('loss_ce_0', 0.248), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.459), ('loss_caption_0', 1.699), ('loss_caption', 1.699), ('total_loss', 9.337)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24605 (epoch 18), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.18), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.243), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.775), ('loss_caption', 1.773), ('total_loss', 9.644)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24738 (epoch 18), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.101), ('loss_bbox', 0.016), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.556), ('loss_ce_0', 0.246), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.556), ('loss_caption_0', 1.727), ('loss_caption', 1.73), ('total_loss', 9.525)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24871 (epoch 18), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.241), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.77), ('loss_caption', 1.773), ('total_loss', 9.641)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25004 (epoch 18), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 8.143), ('loss_ce_0', 0.247), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.143), ('loss_caption_0', 1.692), ('loss_caption', 1.684), ('total_loss', 9.379)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25137 (epoch 18), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.179), ('loss_self_iou', 0.004), ('cardinality_error', 7.88), ('loss_ce_0', 0.245), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.88), ('loss_caption_0', 1.691), ('loss_caption', 1.696), ('total_loss', 9.347)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25270 (epoch 18), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.185), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.238), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.687), ('loss_caption', 1.694), ('total_loss', 9.34)]), +time/iter = 0.146, bad_vid = 0.000 + +Validation results of iter 25327: +Bleu_1:0.19191750615066444 +Bleu_2:0.11783589874301872 +Bleu_3:0.06597231596326529 +Bleu_4:0.03167603834812624 +METEOR:0.08996609888818348 +ROUGE_L:0.1746391859525846 +CIDEr:0.5689023016363987 +Recall:0.31503357525649683 +Precision:0.4376628112951966 +soda_c:0.08097707611185051 +para_Bleu_1:0.3977375551078834 +para_Bleu_2:0.24323062675170298 +para_Bleu_3:0.1488548587270082 +para_Bleu_4:0.09292110149283073 +para_METEOR:0.16716298804356167 +para_ROUGE_L:0.33781551083855066 +para_CIDEr:0.31014493696748857 + +overall score of iter 25327: 0.570229026503881 + +Save model at iter 25327 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 25403 (epoch 19), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.005), ('cardinality_error', 7.429), ('loss_ce_0', 0.248), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.429), ('loss_caption_0', 1.705), ('loss_caption', 1.695), ('total_loss', 9.343)]), +time/iter = 0.723, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25536 (epoch 19), +loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.189), ('loss_self_iou', 0.003), ('cardinality_error', 7.887), ('loss_ce_0', 0.246), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.887), ('loss_caption_0', 1.717), ('loss_caption', 1.729), ('total_loss', 9.517)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25669 (epoch 19), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.707), ('loss_ce_0', 0.243), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.718), ('loss_caption', 1.711), ('total_loss', 9.385)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25802 (epoch 19), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 8.173), ('loss_ce_0', 0.242), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.173), ('loss_caption_0', 1.732), ('loss_caption', 1.735), ('total_loss', 9.515)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25935 (epoch 19), +loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.241), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.626), ('loss_caption', 1.628), ('total_loss', 9.063)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26068 (epoch 19), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.182), ('loss_self_iou', 0.005), ('cardinality_error', 7.444), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.444), ('loss_caption_0', 1.697), ('loss_caption', 1.701), ('total_loss', 9.35)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26201 (epoch 19), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.097), ('loss_bbox', 0.014), ('loss_giou', 0.168), ('loss_self_iou', 0.005), ('cardinality_error', 7.301), ('loss_ce_0', 0.237), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.301), ('loss_caption_0', 1.702), ('loss_caption', 1.703), ('total_loss', 9.254)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26334 (epoch 19), +loss = OrderedDict([('loss_ce', 0.238), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.174), ('loss_self_iou', 0.003), ('cardinality_error', 7.827), ('loss_ce_0', 0.242), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.729), ('loss_caption', 1.725), ('total_loss', 9.424)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26467 (epoch 19), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 8.023), ('loss_ce_0', 0.245), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.751), ('loss_caption', 1.746), ('total_loss', 9.586)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26600 (epoch 19), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.902), ('loss_ce_0', 0.242), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.902), ('loss_caption_0', 1.727), ('loss_caption', 1.737), ('total_loss', 9.533)]), +time/iter = 0.156, bad_vid = 0.000 + +Validation results of iter 26660: +Bleu_1:0.1908811984292725 +Bleu_2:0.11664270449592412 +Bleu_3:0.06546844271584715 +Bleu_4:0.03266470081303028 +METEOR:0.08981101020496235 +ROUGE_L:0.17382953846907112 +CIDEr:0.5716745559959934 +Recall:0.31292035599338697 +Precision:0.4345220728699943 +soda_c:0.08127095018359767 +para_Bleu_1:0.40170065588267356 +para_Bleu_2:0.2447870245859959 +para_Bleu_3:0.14990588787772124 +para_Bleu_4:0.09419227635900729 +para_METEOR:0.16780671784283924 +para_ROUGE_L:0.33845945539662686 +para_CIDEr:0.3198675630646056 + +overall score of iter 26660: 0.5818665572664521 + +Save model at iter 26660 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +Best epoch: 11 + +Best Model Performance: +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 +avg_proposal_number:-1 + +Best Overall Score epoch11: 1.5265537286258848 + diff --git a/yc2_univl/val.log b/yc2_univl/val.log new file mode 100644 index 0000000000000000000000000000000000000000..76f83c2963d4b440439af55ee7506b115beba8c3 --- /dev/null +++ b/yc2_univl/val.log @@ -0,0 +1,21 @@ +Best Model Performance: +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 +avg_proposal_number:-1 + +Best Overall Score epoch11: 1.5265537286258848