diff --git a/.gitattributes b/.gitattributes
index ecbd6733767fe5cd6acda87d23875bdce16b0fa0..6c1045b3720c5e6aa3f8a3ae41e82a5d81326097 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -38,3 +38,13 @@ backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pd
 backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
 backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text
 backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
+anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o filter=lfs diff=lfs merge=lfs -text
+anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
+anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text
+anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
+yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o filter=lfs diff=lfs merge=lfs -text
+yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o filter=lfs diff=lfs merge=lfs -text
+yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text
+yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..63d60c15c02ba592a06fc67e09c654d568891054
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9569dff52f023f43117ca926bbde3e1f14003fdd
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b28f2bcdbfef92df0153ebf03faaa2bc73158a1
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c9314eb31f87ff6f0f44cbcf948b2c4224a9eafa
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fa91240b87d388c80f808d8a78858fc60e197ed5
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..201c1ff4ff577f8ff9d247b699a0118d13adb728
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6a34d7ea66b7574d48f980820ae8fd055632c014
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8f7cc5a1f6a8314a0fb47ec38587b39870114639
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..423e5ab89a334fa4ddb0234345c578eee20851cd
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4a5acd993bcf0a0f439319aef297c4eaf9ec2b15
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70fa415758e8d88826ce9466a1533dbf91cbcf95
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1bba6ba8d60d7c73b4a8f81a30d3bfafbcc6c1bf
--- /dev/null
+++ b/anet_clip/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..864c3a0fc0ada3b8ae6d5c81edc5d12586d3123e
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a34730f54add9830465c52e42fbfc9536b95a29
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..de7481364454b43fb87a1655b09d949110b25c5c
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..34eecf5b883e5b9c6f750e1e747313b6202c5291
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..002f85af4093414f60b6e37e5edc14b204758ac1
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b3f476c95ea36a4bf987b132a41393c5d09ef19c
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b505369a59c4e6956fc3222dea1d31be4a831ff8
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a678cdbd0c0195b00d3315750ec658810a0bfaa
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6c339820bdc37d7f054932b6d74615188021197d
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c19e8416b340d0c95e10fd3390f640a07f7184f5
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7cf973f734fd70cea269c6a60dc0093e29bbc04
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b8d051a83ac473fc18d14c10b8226dc414381d9c
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c4d48bc7e63e6428984d0bc2129742f2f7dbc262
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7555c91df9a6110009920a7b5ac22c155cc59cfe
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..820f13ab195e62fe83b6a5c8d8086ef3ffb62b28
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..62551ec728f7b1283c495996b80d72abe2302686
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3a2cda8b9fa5b1093cd4327dcc407bff408a00e6
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e270b46619490eec7d96e25950138a4e96238d6a
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..65ca7f9d880f365ced5096533819011ad152b1be
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c9d59a3f0a2f985360f987d74142d74c3ad8ce9b
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87364862dc54a01ff7835edf337d948ef7aff565
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..73c09505c83eae12bc26b6b16f8e4239aa5914d8
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ff51f66d82ad89fac2bd3d3340d30d2d2d5c1885
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a9391a3c8b96f98d601212fe5fee01d56fd73b2f
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..756953c9ca6e0fc91efe05cd28cd8d01f18c1700
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fdc8083bf18fbabbcdc25a93b1f095f6a276a544
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5c6bbfc78122b62b4c6f8bb4abae8adc706e30ad
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8527b0a6296d1a970dca3691f3d31ea3dfa281d
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0d9d74565b11b912a681a1871360ed0bd2385ff9
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57f86183de530604294a363c8387c1c8b49e93af
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..de9accc5c444a46a8639dc63b3d2841b6b1fdb28
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a600901cb9f10422e2ce532cb8c77ac03dc57959
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29e6ac298128de00d5eafc256f3cefc35eb26585
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dae41f38476b77921cdb2f030d11a1f32076622b
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165
--- /dev/null
+++ b/anet_clip/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3741fe96fbe15b96ea12feca4e9fa98e58b4b141
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ee94d104a9e0878da2aa2e588adeb888ff12355
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc15339112fa0ff01c5615b311bebee685e3c089
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..38f4cd642822a36efd860f20971030093b467b26
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c08068e50a1b52db2346ed7d91f994822ecb308a
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e3d37e0af57206d3a8ace41cd93cac2d92a99aad
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..39fac1ab1f5ba0ef8be9166ab400b0303dab3c55
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2342e4a5a58d9938a847f0bf11ea87de5900dadf
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d90465e8281858d7557440ae000a2d8030b5f1be
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca08a4041f2660b9eef0f6db53a672d88bfaa52e
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..18d258a9eff34e7bbbcdebcd0462250746352d21
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..739efd5c2d9c526fac569d14c81206c02a677755
--- /dev/null
+++ b/anet_clip/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dbc433e0a1e0d5b37361a96e3970c0d720639db4
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_CLIP_pdvc.yml
@@ -0,0 +1,17 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d4e2a056537258e7c06849d22d4c26c7b25e223f
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_UniVL_pdvc.yml
@@ -0,0 +1,17 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..72849bd96ab440b568774e6ee8a57f6ed6788162
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml
@@ -0,0 +1,27 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: similarity
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fbcd022d8623d2fa4e95c31b1f8f6adef8076c1f
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml
@@ -0,0 +1,27 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: similarity
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fa5891c2d59ff2e88a6ccbca706f7ca15f539976
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9d3fff7669479f43563fb8f80dc98091b835cd49
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d00afb6f6a5f3d979ee0b513299460ed59528d71
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b56d4945517c6bd61351fcbef05b48c9b7448d25
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ee0f7cd39031aac12a49fa3febbad874ee84eb3
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09ee646dbe8a44dc5bd827e2ff354f954306512e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bcc52404af176f94a0d3cecc3fcff26900f73e07
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c22d10c1f33975e94f1777fc6529b90feb81ba71
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a4ee8540f6cbd301e407c8fd4795518a729b2a7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e5cc7747b0917df6ff67aa7af2dcc961853d3643
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a8ec8379c8cfae0384ed5c5a23cdc8ba28b250d8
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7174b3bc97912d16e2db4f3948daf1d5ccca79d2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..047111607ce477a768f570d0c1bdbe1809fd2b27
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ec22f2c3149a6bc2a839c8b7a1e7f95f799a740b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bcd7b440c4ff76e8b2cfa3324524c6a0fb7f8b3e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ceeeab0f8eddaef985f7f96f30092bf3be1c477
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b9514fabfbb681385913ea0ab7edaedaaa62b628
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..521dc511180da8f9b94569e4ab0a45844266d973
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddd576b3753f4db9a137e8d3cb80dd233e0a4
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cbe60a5936fe763099137039a222cd0563ebaea9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9960dcaa94683c69f1c63bd6fadef7b17315ebea
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e57a1150b7f8eb19b75d32729945101e3f63970a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a639c02411b28e7b61485c8306201526a98bb30
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09300cdacf1b21dae6e6d983cc53c0c3674f146c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d933f4e367996fcd355585308151316fde844160
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9289c03643143a572b64987cf8b733118531b7a2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ecd4bd64bff3f0c9ec4c9a532146ccd657edc907
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0d228dc7e9de40c84d91a90b4e9e3accd41af0d7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6fe87609b78a2e77ad203ca2136882ba13568493
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d21448627979c6757d3299faee2ea5ab4d2d1b09
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc8c5a5dc77dae477e0f880e515993a46357a8c8
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..665f42194576bed58d657628d916710efa51b514
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc8f3897e2e398bf6cedf9c02936553098f2c73
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ac2b451a19de13876e1d0dd042878289fdaa195
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..935b0ececa15dcf4658c1e10a5ae52b93079b0fc
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc.yml
@@ -0,0 +1,11 @@
+id: anet_c3d_pdvc # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e0db6b87acea5ffa66e35e868e44194a04c39852
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml
@@ -0,0 +1,9 @@
+id: anet_c3d_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..828311fc71fcc95e9b1a08506d11bb6ab602b665
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl.yml
@@ -0,0 +1,53 @@
+id: anet_c3d_pdvcl
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..02b38b6f2dbbb53b838d9bfbab8cf268a7c02c62
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml
@@ -0,0 +1,55 @@
+id: anet_c3d_pdvcl_gt
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 10
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+#with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0.00001
+set_cost_class: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 0
+bbox_loss_coef: 0
+cls_loss_coef: 0
+count_loss_coef: 0
+max_eseq_length: 10
+#lloss_cross_entropy: 0
+#lloss_focal_loss: 0
+#lloss_gau_mask: 1
+
+#two_stage: 1
+transformer_input_type: gt_proposals
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml b/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d2aa20fce1241e60ad77a69980acf1e3b653ef1
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_c3d_props.yml
@@ -0,0 +1,51 @@
+id: anet_c3d_props
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+train_proposal_sample_num: 15
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 10
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: none
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 0
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..147d726179a848dabb0367b22575fa2f20de4097
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_i3dvgg_pdvc
+base_cfg_path: cfgs_base/anet_c3d_pdvc.yml
+visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish']
+visual_feature_folder: ['data/anet/features/i3d/', 'data/anet/features/i3d/', 'data/anet/features/vggish/']
+invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json']
+feature_dim: 2176
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a6991e551815ec0ac234c30ab3a6d09f1bd75cf
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_i3dvgg_pdvc_gt
+base_cfg_path: cfgs_base/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish']
+visual_feature_folder: ['data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/vggish_npy/']
+invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json']
+feature_dim: 2176
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f30c2ab5a626538f4dbc2c1a1bc497196ff46f24
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2d3b50035fe6a95bb2a5790f8b3611be54fc0fa7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..270ae993e3f72bc2d9091b4809e8715fc6c86dae
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7340a306c59e2ac685e8c59ac2960fde366e9c7b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..16f3082ecb947291ccb4f2226312fcf3fa06d349
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c167a9b0499503b9eff84d0c1ea1aa42453cf117
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c48adf788535e24daf8e7ffe16f2e60009118f1f
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..65d5217788315daae5e6bbf002eb746b010e2bde
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9dc19b7dcbc2f364b03abc0014d17eb6375b4a99
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..042d3f885aff7b261f66fa03dad252aedbf2fcf9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2f6e44731cf2c826b91b8173148e120b64d04f66
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4e0f151d036988969e902c436085fa850bb50a4d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8e44527dea994822b026b814df9b354aff082b53
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..08f52e1ac3b1ebca5e2d62c639fd5f6b5752ddf9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bd202e1359b3d756d903c93acf07e4dad268323e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3fe14dc05390932f58a3f5ce8b3ffa3828296200
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..211b0adc17f02c1b64ce3ceff2c0122c7581eada
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..674edd157c6ffe26bd7e9248faffc1a68a997d35
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..20243b5ac8be187c91a7e54eb86cc27db6f21559
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cb675d9549b34cf0ee2258d3e6a107273d1e4ffd
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..586821983af378162d355b298b2788e0c651e0e6
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..42476cbf72bb3d8b304e89192a30bda1606046aa
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b0a8e97b44c541a99afc965764187cf264bd4268
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9e0ce1a0db8dd453b811f6f0d5609f8ae7648a6d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8725e365b44b967e794cdc16423a571c71e33bd
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2fd6450a47d8392d35b67b997ec6173f35b6ee4b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5cd314fb9fd20fa05b1b5417604b40d115ee008e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..23e3dae52ca63a879083d7eafc8c1ab7e1556d71
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a4a108d38d2c512864344770d5943e439eb151d5
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c30ad37380e6b9ef2b845061471ab2a4ff293d91
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b140662d119689dca4e409f85da91e882323bc0f
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5e03028433a6bbc1c25ae936d791096e8e7826b2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ad1160b06416b51bd4e728eef5e6225f023796c0
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f56ac030b287f6c0a806833b546d90a9d8fe9670
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..79f9caa36975efda224cb605af412efda721e7dc
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvc
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c748cd44f1b9ea7607e4482da4af8444347d3f88
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5543e4e259942b72d98f1fe16cd4311be93ef3c7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvcl
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9804be364f78a4a8f26e30e0e6923558194edcd9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvcl_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1c4ef82922a7df99f37d1a626d4a89e8c9b95722
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvc
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..df92966691ed0fa33bc4b7417f6c0ade5b383869
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
diff --git a/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5298c707ab8887be611c86d522e855a8a5123a4
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/anet/anet_tsp_pdvcl.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvcl
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d83ae5b6762ddc39bcdab2aedddf47a6ed8571d3
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json']
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_anet.json
+vocab_size: 16221
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a7bf2a745aecc0b05232f717c81a97333ee55af3
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/anet/captiondata/train_modified.json']
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_puyu_anet.json
+vocab_size: 15249
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..17b3bd0263edd713fc329bf1df7b539e2f160b3d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2.json
+vocab_size: 14538
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0f9ec30bf455a8a9d51bb867bdbc8e4d514c8006
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2_puyu.json
+vocab_size: 13411
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml b/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3deec04627b419ff129a14bcf6ef5f8382bca7af
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto_anet.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: 'data/howto/captiondata/howto100m_train.json'
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_anet.json
+vocab_size: 16221
+
+
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml b/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..85343a3924a24e42054f963f220b2a3e93769070
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/base_howto_yc2.yml
@@ -0,0 +1,62 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/howto/captiondata/howto100m_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2.json
+vocab_size: 14538
+# dict_file: data/howto/vocabulary_howto_rate2.json
+# vocab_size: 14432
+
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..92c6b64a0b9a276122b86cabe3ad428fa8fd6c8a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dfb930405a3050a89e929c9219635231b546d3cb
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..15f8524313eeff1620c67764a09bb5268d50c249
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1c19e4b987a23b04a75b0eba01abfe0de360783c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6141c44f52d807457f9cf0c759ae34f0ce6c024c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f45381d945d502f5a7d421ac2e2d17a7abcd5d87
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..351d2d32411e886fb1dbbe52674c262b39e1ca77
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..691b7faef9301ea5d8b205f226ecdbfe7f0618c9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..73f5f0634fa1d4b00a2fb49f1793d72e67c16c87
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..078379e88f4811421e58a8d7930e932ca6641e24
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a98bb3237ea032ebfca52ae34e59d88aa3592ffa
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f3192d4aa5b4456deba35f0f4e404a2b11fa7e00
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b6b1224667dae251754c76aeaccbf93a63893e54
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5545fc09291a7589f59c8725f38a17144f43f826
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c202054206ca67c6885cf4a34bf12a2c9eb163ef
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ebd15f0c193ea72d878905401027fd28a330a6e9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8dd87d3485fe65777b79be423ffe881796fe879
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9bf87a23994a2adb512b6fb1d1a2188f70de82a6
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..271b44b03d385f7ad93fdfff959e8b41486451df
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9779a1898513b16aa1c2aaa57f4eb6255b2f9253
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..996e5360fc812f1943959c0bc468974117a97a94
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c56573da93d2d948eae7d723904a9243498ac484
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4940aa43eec6d5fcb08905bac7892f10b28c7549
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9f373f5e6dfc0a586f104777611c18ad10dcddb9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e28db709d1590f546524626df9b13676034d0489
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f8bf9b58b304ea4bc7becf5368d3025b192e5ea5
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5c56556156aa50810c468bb22a2d00ace2213860
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ac92f31028eece0c85e6cc642876b3fee015063
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..456af3710b005fecd7848bf8bfe73e7de8dd58df
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b5ab9e0bdfca3c12d8c932e52a0e0e20bf6e759a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c6cdee3f0a43f9eee2898c610868aea88103ad9a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8e32f2b3092a0f11c35bf4a2ab0ab62172c23815
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9205bfd2568d41ec26689b63fcdb82f30f1e0c7b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,48 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ed0b046f4922fec52231c7c0d5c551cff82f0cc
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b40c179f05dff58d76a601a232e6fada42f29505
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..32e237518518df60263b914b65d18dba1d0b8f46
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1dcb3b5b547fda7ed525234c4f97976badc60c04
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 30
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6f775d16de8d0ca478c89310cf56ae9cc12b6d6e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..04d7bda3fde5f365542bbd033cee609457c89604
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 30
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ebb97f21d137b0826c96dfea0a86e234195d39be
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..47aa1ebe8a83f7375281ca97c736b47b312d7806
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..86ad0779edfff76fd33862932cd3b534902be794
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..11cbc212d5837924bc8afb7d4635427be51ee216
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0dcf153ed24336b039f3e288035937de41f36a94
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..424cf659ab8edb6fc4b81364ce47a2d568f65c07
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..53a57713fe15fb61233549dc5ecb309e986a5508
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..93346fbdee8e6aaf398c9c429b91cc825377c9aa
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f5f9a193a786aad4960447288edf675aecb58129
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml
@@ -0,0 +1,21 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_anchor: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8dbabf6d032f5848991215aec06c7fcef0fb711
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml
@@ -0,0 +1,21 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_anchor: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7a064f9627e6d2f00667c740f27f3564ce16d63
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml
@@ -0,0 +1,32 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: align
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ef7c8924196f230fbf7abf4bd2aada4d8c813275
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..355c7d66081451b3513304f8b81126fd1976f918
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5121b6cf76c3b4966c382f4cfca11c71891a721c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a95a0e782daf8c2f799f90d93aa95a6eceb015b2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59967b4b1bc17087eda8b17e520c3e83e153e699
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..178db2ad770c80e3d3d365842e7df6f2a24c0fee
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7555f552813b710ea274c5068b4b4b46b15e00e8
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..726c043ff0300b678776e8391d1bb3ed962307cf
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..71be5f3eb1240f38e131b7600e58cf1a1b3f7b3d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc03cac62cb816ce7b98e9fcf4ff96167d03b682
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d054ac67dc4a7765def67b554b1c520c04cd18da
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..27e89d7b820b566c825bf2be4e6164f218664588
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..39dfc692b19e2acf130606f314214410f0720990
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fe6cba647bb5d104b990a3415706d56971426b1f
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..05c7d7053dd25cb4a3e06340268c29a14d7dc05e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..40fd330d45cf07a8e34c442d56c1339fa237c5e1
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fd4ead981978c54801c70e6dec9fc4154f6fcf40
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..86b411963f0ab337fa843b01a49b743c55d0f4ba
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a4b959243136e53599aa026ae7a386d9ae8ce41a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..259b30520a7d6eb411b14497d97e2b47939933ae
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59ad8e64a36d46297e604044ab9fa8c0fe8bf3e1
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3f8a497a405c767f138ba08ec811b417d62e5674
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0aebca8432993dd31f123e618c40a701da8ed968
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29a1b716404048c53360c2e95eb69510f03eb1e9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,61 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9c2142a150ea00790b8c18556d0004bfa89afae8
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1b624be12c30a6132baf378a696b5d85949754b5
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,49 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+
+pseudo_box_type: similarity
+top_frames: 15
+window_size: 3
+statistic_mode: mode
+width_ratio: 1
+
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3aedf7494857125480dd5c5dc50bd3a5cf9f4260
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5104cde9760d48a7f9fa23cb1a04b825c18f3844
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8bca42c4f97b02f9f9bb5f73f37c7660a2c17f64
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f75a379f584c0e1c6937f7382106b41aa13de36b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70bb98540afb82ad228fd38386b705eac0186b43
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..377d0315f95c91a695c3452a623a2178f36f8b5e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a43f4fbb5db27f501c2e18f01a4ce609982ca832
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..edb891d5da98423d456c7b80ab35cc9a50143577
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b51463e36ec85d9216a6644801548bae016228fe
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..349183f42fc665a103e3b1b628b6f055bfbaee2d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87138b61dc4b554deebc0245686a152f593825fc
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml
@@ -0,0 +1,57 @@
+id: tasty_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/tasty/captiondata/tasty_train.json'
+val_caption_file: 'data/tasty/captiondata/tasty_test.json'
+gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json']
+gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json']
+dict_file: data/tasty/voc_tasty_14.json
+vocab_size: 14670
+max_caption_len: 50
+
+train_proposal_type: gt
+train_proposal_sample_num: 50
+gt_proposal_sample_num: 50
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: standard
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20 # 42 is the max number of events in tasty
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1a82a7274fb3f956c8545096bdf86e3e1f9c0468
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml
@@ -0,0 +1,57 @@
+id: tasty_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/tasty/captiondata/tasty_train.json'
+val_caption_file: 'data/tasty/captiondata/tasty_test.json'
+gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json']
+gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json']
+dict_file: data/tasty/vocabulary_tasty.json
+vocab_size: 30171
+max_caption_len: 50
+
+train_proposal_type: gt
+train_proposal_sample_num: 50
+gt_proposal_sample_num: 50
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: standard
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 42 # 42 is the max number of events in tasty
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml b/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d14b206d40c4f2399400913d1ff15b8659b575b9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/vlep/captiondata/vlep_meta.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/vlep/vlep_vocabulary_rate2_yc2.json
+vocab_size: 4491
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cccf2ec29dc7c513d8b4cc90d4a6f6a3fabbc28d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,44 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/vlep/base_vlep-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/output/vlep_clip_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/vlep/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..00399b0c2f2a021a7476750b003026045d776cd1
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml
@@ -0,0 +1,20 @@
+id: yc2_UniVL_pdvc
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ab29c8850e08c4549496f768a65f7ff4d08f33ba
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml
@@ -0,0 +1,19 @@
+id: yc2_ViP_pdvc_norm
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP-ViP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text/']
+feature_dim: 512
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8ecdedb6ef1d7787c74040ef64b644bfb98d956
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml
@@ -0,0 +1,29 @@
+id: yc2_abox_ViP_pseudo_similarity
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP-ViP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text']
+feature_dim: 512
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_type: similarity
+use_anchor: 0
+pretrained_language_model: CLIP-ViP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c15a2a7c6e8dea1452f91f26dda6fe9fb6ebe8f7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..217693d7afa3593227e2d368fdb2552cd9371369
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..13eef191acd1177c2d4e7bdc64f2b755e80ab5e5
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..88bd8b2fec5674da63b5171fa4c7bfeee0426fc9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4846c148573235eb6aa047cb024e6c78a4e1cba2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4b329f14bec0848ba829c1b4048e7fe22fb46e83
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8d62fd00faf076f269351f19da5083817b419ff0
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e25901b1bf82142d41e05343f65e1ba59b8d908b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..77143cc57f11432ea5001da37c0014eb1696acc2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87ff91cad79953a42bb1b582e8170779d1e147c7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..388cbc2b2527190f4ccf17c7006ec9adee33ae5e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b7d7cecca9a90d08a1fe516e115891645fb0b6
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c49641fdda1e81d5e713e9b615d3d1186a7fea7d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dbb37176adcdc33825abe0ed4a943588ad157f4a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b271a74c0f4f3d976410fcc2b607d12056124d08
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5d91010f653fe7c8f52caead5e7737a8ea102fd9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bed129c0b6368d63e430310d7caa0ce4a633e329
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca26e6a13b9df3b80e91cf34b8668676895d6214
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5086a3c1ab5482fd8595d66ebd5cecd3da4502c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ec5cdeb72ada972c4b0ec906ed3d96060a7e018
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9d56829b138e152c73a8513f85f3f086c40ae838
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..042ae96b01a2c741fe1304441db5bd0ca14109b4
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e9f47c5587ae99e197c9e4c2d87ccc4545923143
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..78f0529a85faecb3ca48833e213e1be64072439a
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..615d745a91273d92d158ecbd28e9eb7e5ec77640
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f2fe9580c80c0a6c19ebd865cc9625a7f3b997ec
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a3866db1b5556c8b1c7a3f37a37686bdc2170a13
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..707926c6592afdac874f5ba9bafb7c3855e9b18e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cbc2e0503505aa158c62d8f3687cce87a45ece37
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6ff93b47407b09e7af54d101c6dfcab1d359c1d3
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8bdddf2857f8baee722f798460eee584a75e07e
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..35cf1c1bee6180fdd84cf580e3266c24f80bfb2b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c68cd4381b5ddc5447b4da98c968702fcfad52d2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b1dcf629977378040564bf4b9256f66fe8a76282
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6768351e467f9cdcfb4cf503621faee1d28da7d5
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..48c7d7ff81d64b9b885be7e8c9c114d5b3a177c6
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a2880d6336436d6d2da1330e3835323340f5ddd7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..96d7bc4b0db3ea7647ac39f487d43fe96d1b9846
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6dd2d07dd9d060137eeb3f135a7e2a420d110fdf
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6043b9b8e94f7f09c976fd10fd340ce0370dbc21
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ef3bd5c6fccf6e8fa460e56fbc89d5cda0ad5a16
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ba9e35266c5ecce655d98fbda735fffd5505ab3c
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..373ef4c276a633c7923d9cdf106d81863573f378
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e6e781eb010da1f9c67a8362776119348f972795
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c2b90ed1817ba9b5af6f71c7aa48499550fed905
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..71cdb3e5f6a305eeefcf987c447ad38ce2c0b9eb
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..14e4add2192671c62c3c5ecab34be01db392ead2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..658a48fbf1a31ce7522f65e4f04453965f1a1130
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8c2448ad587b99db12d4004259da8ad83c400547
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca9efe1df117e81a57d9f4c0b37dccef8ce071a4
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ae2b3a1874058268562185df28b4568088518bd
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9f416921a9d59d98d643118774215085f50e4adf
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a43d8c6e42cb71191f4db57266b12c2f8ff1df7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ce8c21fb91989e7b37c1504989be65eaa5c17c1
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..25c75deb4061ea7fe6c59f5d63d26b85c332f8ce
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c1a0e79d75681650dce5753ea1934f39e05931ba
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7e6f5990069b64bd922d068c9721144ee3ed8467
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a061bab74963587e8c148e67c47ba3e8d8e7bdc9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,43 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6cf3256fca24a4cdaa0c9e8c89a4fed74edd684b
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e13936ee7c32a85bb8553de1bddf8dc85f6acdd7
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..31d2af3cfa6d15ec3ce1067648dcb89cf410d309
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1d5096ca8903c240faa1e45589e77786800bf4d9
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c04663ab87edcd38ea5a0fa00c17d99f3203fc02
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca1d85d39c264126d0cdcad3fb4ef6dcd9d78249
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e431de6395e6a745cf9c9e5f560621c1ef911015
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d4b9dec009027bfad79d96f94545c19465146271
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e66d364a384fa467573af7703d97baf23098c9a2
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..60d558dd0b51cbe8d184681d7227c91e76246540
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fc66b3cbff2550bf0264a79dd43d6b93ab7256a0
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml
@@ -0,0 +1,13 @@
+id: yc2_tsn_pdvc
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9a1c528c5c792081cbb4873983306c4268a23d55
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml
@@ -0,0 +1,9 @@
+id: yc2_tsn_pdvc_gt
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl_gt.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml
new file mode 100644
index 0000000000000000000000000000000000000000..79ef87700f600af96cb41f1953b4fb1da336c8ec
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml
@@ -0,0 +1,16 @@
+id: yc2_tsn_pdvc_prior
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+num_queries: 50
+
+ec_alpha: 1.0
+
+transformer_input_type: prior_proposals
+
+#dec_layers: 3
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml
new file mode 100644
index 0000000000000000000000000000000000000000..14941f50b2699cc25e74ee388bfe086ae0bda74d
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml
@@ -0,0 +1,18 @@
+id: yc2_tsn_pdvc_prior_add
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+num_queries: 50
+
+prior_manner: add
+
+ec_alpha: 1.0
+
+transformer_input_type: prior_proposals
+
+#dec_layers: 3
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1420f8abf88d8bbdd6c9cf05454f0949a9fb6c44
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml
@@ -0,0 +1,55 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/yc2/captiondata/yc2_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+dict_file: data/yc2/vocabulary_youcook2.json
+vocab_size: 1607
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..435e85fc3946b15c389de755987b34f8bd75d469
--- /dev/null
+++ b/anet_clip/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml
@@ -0,0 +1,57 @@
+id: yc2_tsn_pdvcl_gt
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/yc2/captiondata/yc2_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+dict_file: data/yc2/vocabulary_youcook2.json
+vocab_size: 1607
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+#with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0.0001
+set_cost_class: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 0
+bbox_loss_coef: 0
+cls_loss_coef: 0
+count_loss_coef: 0
+#max_eseq_length: 10
+#lloss_cross_entropy: 0
+#lloss_focal_loss: 0
+#lloss_gau_mask: 1
+
+#two_stage: 1
+transformer_input_type: gt_proposals
\ No newline at end of file
diff --git a/anet_clip/backup/change_config_add.py b/anet_clip/backup/change_config_add.py
new file mode 100644
index 0000000000000000000000000000000000000000..610c71dbf03a1817cda08454698805982df1f985
--- /dev/null
+++ b/anet_clip/backup/change_config_add.py
@@ -0,0 +1,78 @@
+import os
+import yaml
+import argparse
+
+# add dryrun option
+parser = argparse.ArgumentParser(description='Change config files')
+parser.add_argument('--dryrun', action='store_true', help='dryrun')
+args = parser.parse_args()
+
+
+
+
+
+# Define the folder containing YAML files
+folder_path = 'cfgs_ref'
+# folder_path = 'cfgs_base/anet'
+# folder_path = 'cfgs'
+file_filter = 'yc2'
+
+
+
+# Define the string to find and the replacement string
+# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video'
+# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj'
+# find_string = 'data/yc2/captiondata/yc2'
+# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text"
+find_string = "UniVL_refine"
+# find_string = "pdvc_mode: 0"
+
+# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'
+# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'
+# replace_string = 'data/tasty/captiondata/tasty'
+# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml"
+replace_string = "CLIP_refine"
+# replace_string = "pdvc_mode: 1"
+
+old_name = 'univl'
+new_name = 'clip'
+
+def replace_yaml(yaml_file_path, new_file_path, old_string, new_string):
+    # Read the YAML file as text
+    with open(yaml_file_path, 'r') as file:
+        yaml_text = file.read()
+
+    # Replace a string (e.g., 'old_string') with another string (e.g., 'new_string')
+
+    yaml_text = yaml_text.replace(old_string, new_string)
+
+    # Save the modified text back to a YAML file
+    with open(new_file_path, 'w') as file:
+        file.write(yaml_text)
+
+    # # Load the modified YAML data (optional)
+    # modified_yaml_data = yaml.safe_load(yaml_text)
+
+# You can now work with the modified_yaml_data as needed
+
+filelist = os.listdir(folder_path)
+# Iterate over the files in the folder
+for filename in filelist:
+    if not file_filter in filename:
+        continue
+    # breakpoint()
+    if filename.endswith('.yaml') or filename.endswith('.yml') and old_name in filename:
+        # breakpoint()
+        file_path = os.path.join(folder_path, filename)
+        if old_name == '':
+            new_filename = filename.replace('.yml',  '_{}.yml'.format(new_name))
+        else:
+            new_filename = filename.replace(old_name, new_name)
+        new_file_path = os.path.join(folder_path, new_filename)
+
+        if args.dryrun:
+            print("Dryrun: {} -> {}".format(file_path, new_file_path))
+        else:
+            replace_yaml(file_path, new_file_path, find_string, replace_string)
+
+print("String replacement completed.")
\ No newline at end of file
diff --git a/anet_clip/backup/demo.py b/anet_clip/backup/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8e3ab4946905f140f377d120a14deff85f4622f
--- /dev/null
+++ b/anet_clip/backup/demo.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+# Example similarity matrix with shape [10, 200]
+similarity_matrix = np.random.rand(10, 200)
+
+# Example range of indices for each step (stored in center and width arrays)
+center = np.random.randint(0, 100, size=(10,))
+width = np.random.randint(10, 20, size=(10,))
+
+# Calculate the start and end indices for each step
+start_indices = np.clip(center - width // 2, 0, similarity_matrix.shape[1])
+end_indices = np.clip(center + width // 2, 0, similarity_matrix.shape[1])
+
+# Generate column indices for each range
+col_indices = np.arange(similarity_matrix.shape[1])
+
+# Get topk values and corresponding indices
+topk = 5
+topk_values = []
+topk_indices = []
+
+for start, end in zip(start_indices, end_indices):
+    # Slice the similarity matrix within the specified range
+    range_values = similarity_matrix[:, start:end]
+
+    # Find the indices of the topk values within the range
+    sorted_indices = np.argsort(range_values, axis=1)[:, -topk:]
+    sorted_indices += start  # Adjust indices to the absolute position
+
+    # Flatten and concatenate the indices
+    row_indices = np.arange(len(sorted_indices))[:, np.newaxis]
+    indices_flat = np.ravel_multi_index((row_indices.flatten(), sorted_indices.flatten()), similarity_matrix.shape)
+    
+    # Append topk values and indices
+    topk_values.append(np.take(similarity_matrix, indices_flat))
+    topk_indices.append(np.column_stack((row_indices.repeat(topk, axis=1).flatten(), sorted_indices.flatten())))
+
+# Convert lists to arrays
+topk_values = np.array(topk_values)
+topk_indices = np.array(topk_indices)
+
+print("Topk values within the specified range:", topk_values)
+print("Topk indices within the specified range:", topk_indices)
+
diff --git a/anet_clip/backup/eval.py b/anet_clip/backup/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2c59801e0e5a9e72ce22521699e53d796efd49b
--- /dev/null
+++ b/anet_clip/backup/eval.py
@@ -0,0 +1,146 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import sys
+import torch
+import numpy as np
+import time
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+from eval_utils import evaluate
+from pdvc.pdvc import build
+from misc.utils import create_logger
+from data.video_dataset import PropSeqDataset, collate_fn
+from torch.utils.data import DataLoader
+from os.path import basename
+import pandas as pd
+
+def create_fake_test_caption_file(metadata_csv_path):
+    out = {}
+    df = pd.read_csv(metadata_csv_path)
+    for i, row in df.iterrows():
+        out[basename(row['filename']).split('.')[0]] = {'duration': row['video-duration'], "timestamps": [[0, 0.5]], "sentences":["None"]}
+    fake_test_json = '.fake_test_json.tmp'
+    json.dump(out, open(fake_test_json, 'w'))
+    return fake_test_json
+
+def main(opt):
+    folder_path = os.path.join(opt.eval_save_dir, opt.eval_folder)
+    if opt.eval_mode == 'test':
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+    logger = create_logger(folder_path, 'val.log')
+    if opt.eval_model_path:
+        model_path = opt.eval_model_path
+        infos_path = os.path.join('/'.join(opt.eval_model_path.split('/')[:-1]), 'info.json')
+    else:
+        model_path = os.path.join(folder_path, 'model-best.pth')
+        infos_path = os.path.join(folder_path, 'info.json')
+
+    logger.info(vars(opt))
+
+    with open(infos_path, 'rb') as f:
+        logger.info('load info from {}'.format(infos_path))
+        old_opt = json.load(f)['best']['opt']
+
+    for k, v in old_opt.items():
+        if k[:4] != 'eval':
+            vars(opt).update({k: v})
+
+    opt.transformer_input_type = opt.eval_transformer_input_type
+
+    if not torch.cuda.is_available():
+        opt.nthreads = 0
+    # Create the Data Loader instance
+
+    if opt.eval_mode == 'test':
+        opt.eval_caption_file = create_fake_test_caption_file(opt.test_video_meta_data_csv_path)
+        opt.visual_feature_folder = opt.test_video_feature_folder
+
+    val_dataset = PropSeqDataset(opt.eval_caption_file,
+                                 opt.visual_feature_folder, opt.text_feature_folder,
+                                 opt.dict_file, False, opt.eval_proposal_type,
+                                 opt)
+    loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                        shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn)
+
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = val_dataset.translator
+
+
+
+    while not os.path.exists(model_path):
+        raise AssertionError('File {} does not exist'.format(model_path))
+
+    logger.debug('Loading model from {}'.format(model_path))
+    loaded_pth = torch.load(model_path, map_location=opt.eval_device)
+    epoch = loaded_pth['epoch']
+
+    # loaded_pth = transfer(model, loaded_pth, model_path+'.transfer.pth')
+    model.load_state_dict(loaded_pth['model'], strict=True)
+    model.eval()
+
+    model.to(opt.eval_device)
+
+    if opt.eval_mode == 'test':
+        out_json_path = os.path.join(folder_path, 'dvc_results.json')
+        evaluate(model, criterion, postprocessors, loader, out_json_path,
+                         logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=True)
+
+
+    else:
+        out_json_path = os.path.join(folder_path, '{}_epoch{}_num{}_alpha{}.json'.format(
+            time.strftime("%Y-%m-%d-%H-%M-%S_", time.localtime()) + str(opt.id), epoch, len(loader.dataset),
+            opt.ec_alpha))
+        caption_scores, eval_loss = evaluate(model, criterion, postprocessors, loader, out_json_path,
+                         logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=False)
+        # breakpoint()
+        avg_eval_score = {key: np.array(value).mean() for key, value in caption_scores.items() if key !='tiou'}
+        # avg_eval_score2 = {key: np.array(value).mean() * 4917 / len(loader.dataset) for key, value in caption_scores.items() if key != 'tiou'}
+
+        # logger.info(
+        #     '\nValidation result based on all 4917 val videos:\n {}\n avg_score:\n{}'.format(
+        #                                                                                caption_scores.items(),
+        #                                                                                avg_eval_score))
+
+        logger.info(
+                '\nValidation result based on {} available val videos:\n avg_score:\n{}'.format(len(loader.dataset),
+                                                                                           avg_eval_score))
+
+    logger.info('saving reults json to {}'.format(out_json_path))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--eval_save_dir', type=str, default='save')
+    parser.add_argument('--eval_mode', type=str, default='eval', choices=['eval', 'test'])
+    parser.add_argument('--test_video_feature_folder', type=str, nargs='+', default=None)
+    parser.add_argument('--test_video_meta_data_csv_path', type=str, default=None)
+    parser.add_argument('--eval_folder', type=str, required=True)
+    parser.add_argument('--eval_model_path', type=str, default='')
+    parser.add_argument('--eval_tool_version', type=str, default='2018', choices=['2018', '2021'])
+    parser.add_argument('--eval_caption_file', type=str, default='data/anet/captiondata/val_1.json')
+    parser.add_argument('--eval_proposal_type', type=str, default='gt')
+    parser.add_argument('--eval_transformer_input_type', type=str, default='queries', choices=['gt_proposals', 'prior_proposals','queries'])
+    parser.add_argument('--gpu_id', type=str, nargs='+', default=['0'])
+    parser.add_argument('--eval_device', type=str, default='cuda')
+    parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all'])
+    opt = parser.parse_args()
+
+    #breakpoint()
+
+    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+    if True:
+        torch.backends.cudnn.enabled = False
+    main(opt)
diff --git a/anet_clip/backup/eval_utils.py b/anet_clip/backup/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4cd727ecebd0364fe9ad45d94f582fdcb17d54b
--- /dev/null
+++ b/anet_clip/backup/eval_utils.py
@@ -0,0 +1,241 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import collections
+import torch
+import numpy as np
+import json
+from collections import OrderedDict
+from tqdm import tqdm
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+
+
+from densevid_eval3.eval_soda import eval_soda
+from densevid_eval3.eval_para import eval_para
+from densevid_eval3.eval_dvc import eval_dvc
+
+def calculate_avg_proposal_num(json_path):
+    data = json.load(open(json_path))
+    return np.array([len(v) for v in data['results'].values()]).mean()
+
+def convert_tapjson_to_dvcjson(tap_json, dvc_json):
+    data = json.load(open(tap_json, 'r'))
+    data['version'] = "VERSION 1.0"
+    data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"}
+
+    all_names = list(data['results'].keys())
+    for video_name in all_names:
+        for p_info in data['results'][video_name]:
+            p_info['timestamp'] = p_info.pop('segment')
+            p_info['proposal_score'] = p_info.pop('score')
+            p_info['sentence_score'] = p_info.pop('sentence_score', 0)
+        data['results']["v_" + video_name] = data['results'].pop(video_name)
+    json.dump(data, open(dvc_json, 'w'))
+
+
+def convert_dvcjson_to_tapjson(dvc_json, tap_json):
+    data = json.load(open(dvc_json, 'r'))['results']
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+
+    all_names = list(data.keys())
+    for video_name in all_names:
+        video_info = []
+        event_num = len(data[video_name])
+        timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)]
+        sentences = [data[video_name][i]['sentence'] for i in range(event_num)]
+        for i, timestamp in enumerate(timestamps):
+            score = data[video_name][i].get('proposal_score', 1.0)
+            video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)})
+        out['results'][video_name[2:]] = video_info
+    json.dump(out, open(tap_json, 'w'))
+
+
+def convert_gtjson_to_tapjson(gt_json, tap_json):
+    data = json.load(open(gt_json, 'r'))
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+
+    all_names = list(data.keys())
+    for video_name in all_names:
+        video_info = []
+        timestamps = data[video_name]['timestamps']
+        sentences = data[video_name]['sentences']
+        for i, timestamp in enumerate(timestamps):
+            video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]})
+        out['results'][video_name[2:]] = video_info
+    with open(tap_json, 'w') as f:
+        json.dump(out, f)
+
+
+def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8):
+    data = json.load(open(dvc_json, 'r'))['results']
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+    all_names = list(data.keys())
+    num = 0
+    bad_vid = 0
+    for video_name in all_names:
+        info = data[video_name]
+        new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True)
+        new_info = [p for p in new_info if p[ranking_key] > score_thres]
+        new_info = new_info[:top_n]
+        out['results'][video_name] = new_info
+        num += len(new_info)
+        if len(new_info) == 0:
+            bad_vid += 1
+            out['results'].pop(video_name)
+    print('average proosal number: {}'.format(num / len(all_names)))
+    print('bad videos number: {}'.format(bad_vid))
+    print('good videos number: {}'.format(len(out['results'])))
+    with open(out_json, 'w') as f:
+        json.dump(out, f)
+
+
+def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'):
+    score = collections.defaultdict(lambda: -1)
+    # top_n = 3
+    # top_n_filename = dvc_filename + '.top{}.json'.format(top_n)
+    # get_topn_from_dvcjson(dvc_filename, top_n_filename, top_n=top_n, ranking_key=ranking_key)
+    # dvc_score = eval_dvc(json_path=top_n_filename, reference=gt_filenames)
+    # dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
+    # dvc_score.update(eval_soda(top_n_filename, ref_list=gt_filenames))
+    # dvc_score.update(eval_para(top_n_filename, referneces=para_gt_filenames))
+    # for key in dvc_score.keys():
+    #     score[key] = dvc_score[key]
+    if transformer_input_type == 'prior_proposals':
+        dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames)
+        score.update(dvc_score)
+        #breakpoint()
+        return score
+        
+    else:
+        if rerank:
+            dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0)
+        dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version)
+        dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
+        dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames))
+        dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames))
+        score.update(dvc_score)
+        return score
+
+
+def save_dvc_json(out_json, path):
+    with open(path, 'w') as f:
+        out_json['valid_video_num'] = len(out_json['results'])
+        out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item()
+        json.dump(out_json, f)
+
+def reranking(p_src, alpha, temperature):
+    print('alpha: {}, temp: {}'.format(alpha, temperature))
+    d = json.load(open(p_src))
+    d_items = list(d['results'].items())
+    for k,v in d_items:
+        if True:
+            sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v]
+            prop_score = [p['proposal_score'] for p in v]
+            joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score))
+        for i,p in enumerate(v):
+            p['joint_score'] = joint_score[i]
+        v = sorted(v, key=lambda x: x['joint_score'], reverse=True)
+        topN = v[0]['pred_event_count']
+        v = v[:topN]
+        v = sorted(v, key=lambda x: x['timestamp'])
+        d['results'][k] = v
+    save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature)
+    save_dvc_json(d, save_path)
+    return save_path
+
+
+def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0,
+             alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False):
+    out_json = {'results': {},
+                'version': "VERSION 1.0",
+                'external_data': {'used:': True, 'details': None}}
+    opt = loader.dataset.opt
+
+    loss_sum = OrderedDict()
+    with torch.set_grad_enabled(False):
+        for dt in tqdm(loader, disable=opt.disable_tqdm):
+            # valid_keys = ["video_tensor", "video_length", "video_mask", "video_key"]
+            # dt = {key: value for key, value in dt.items() if key in valid_keys}
+            dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt = collections.defaultdict(lambda: None, dt)
+
+            dt['video_target'] = [
+                    {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+            # output, loss = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
+            output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
+            orig_target_sizes = dt['video_length'][:, 1]
+
+            weight_dict = criterion.weight_dict
+            # Huabin comment this line (anything about 'loss') to avoid reporting losses during evaluation
+            # final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            
+            # Huabin comment this line to avoid reporting losses during evaluation
+            # for loss_k, loss_v in loss.items():
+            #     loss_sum[loss_k] = loss_sum.get(loss_k, 0) + loss_v.item()
+            # loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            results = postprocessors['bbox'](output, orig_target_sizes, loader)
+
+            batch_json = {}
+            for idx, video_name in enumerate(dt['video_key']):
+                segment = results[idx]['boxes'].cpu().numpy()
+                raw_boxes = results[idx]['raw_boxes'].cpu().numpy()
+                # pdb.set_trace()
+                #breakpoint()
+                batch_json[video_name] = [
+                    {
+                        "timestamp": segment[pid].tolist(),
+                        "raw_box": raw_boxes[pid].tolist(),
+                        "proposal_score": results[idx]['scores'][pid].item(),
+                        "sentence": results[idx]['captions'][pid],
+                        "sentence_score": results[idx]['caption_scores'][pid],
+                        'query_id': results[idx]['query_id'][pid].item(),
+                        'vid_duration': results[idx]['vid_duration'].item(),
+                        'pred_event_count': results[idx]['pred_seq_len'].item(),
+                    }
+                    for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold]
+            out_json['results'].update(batch_json)
+            if debug and len(out_json['results']) > 5:
+                break
+
+    save_dvc_json(out_json, dvc_json_path)
+
+    if skip_lang_eval:
+        return None, None
+    
+    # Huabin comment this line to avoid reporting losses during evaluation
+    # for k in loss_sum.keys():
+    #     loss_sum[k] = np.round(loss_sum[k] / (len(loader) + 1e-5), 3).item()
+    # logger.info('loss: {}'.format(loss_sum))
+    scores = eval_metrics(dvc_json_path,
+                          gt_filenames=opt.gt_file_for_eval,
+                          para_gt_filenames=opt.gt_file_for_para_eval,
+                          alpha=alpha,
+                          rerank=(opt.count_loss_coef > 0),
+                          dvc_eval_version=dvc_eval_version,
+                          transformer_input_type=opt.transformer_input_type
+                          )
+
+    out_json.update(scores)
+    save_dvc_json(out_json, dvc_json_path)
+    # return scores, loss_sum
+    return scores, []
diff --git a/anet_clip/backup/misc/MIL_loss.py b/anet_clip/backup/misc/MIL_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8a234e01695ca8871b045a0ba31b13e9e79883a
--- /dev/null
+++ b/anet_clip/backup/misc/MIL_loss.py
@@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmdet.models.losses import accuracy
+from mmdet.models.losses.cross_entropy_loss import _expand_onehot_labels
+from .utils import weight_reduce_loss
+
+
+class MILLoss(nn.Module):
+
+    def __init__(self,
+                 # use_binary=True,
+                 # reduction='mean',
+                 binary_ins=False,
+                 loss_weight=1.0, eps=1e-6, loss_type='gfocal_loss'):
+        """
+        Args:
+            use_binary (bool, optional): Whether to the prediction is
+                used for binary cross entopy
+            reduction (str, optional): The method used to reduce the loss into
+                a scalar. Defaults to 'mean'. Options are "none", "mean" and
+                "sum".
+            loss_weight (float, optional): Weight of loss. Defaults to 1.0.
+        """
+        super(MILLoss, self).__init__()
+        # self.use_binary = use_binary
+        # self.reduction = reduction
+        self.loss_weight = loss_weight
+        # if self.use_sigmoid:
+        # self.loss_cls = CrossEntropyLoss(use_sigmoid=True, loss_weight=loss_weight)
+        self.eps = eps
+        self.loss_type = loss_type
+        self.binary_ins = binary_ins
+
+    def gfocal_loss(self, p, q, w=1.0):
+        l1 = (p - q) ** 2
+        l2 = q * (p + self.eps).log() + (1 - q) * (1 - p + self.eps).log()
+        return -(l1 * l2 * w).sum(dim=-1)
+
+    def forward(self, bag_cls_prob, bag_ins_outs, labels, valid, weight=None):
+        """
+            bag_cls_outs: (B, N, C),
+            bag_ins_outs: (B, N, C*2/C)
+            valid: (B, N, 1/C)
+            labels: (B, )
+        Returns:
+        """
+        if self.binary_ins:
+            assert bag_ins_outs.shape[-1] / bag_cls_prob.shape[-1] == 2
+        else:
+            assert bag_ins_outs.shape[-1] == bag_cls_prob.shape[-1]
+
+        B, N, C = bag_cls_prob.shape
+        prob_cls = bag_cls_prob.unsqueeze(dim=-1)  # (B, N, C, 1)
+        prob_ins = bag_ins_outs.reshape(B, N, C, -1)  # (B, N, C, 2/1)
+        prob_ins = prob_ins.softmax(dim=1) * valid.unsqueeze(dim=-1)
+        prob_ins = F.normalize(prob_ins, dim=1, p=1)
+        prob = (prob_cls * prob_ins).sum(dim=1)
+        acc = accuracy(prob[..., 0], labels)
+
+        label_weights = (valid.sum(dim=1) > 0).float()
+        labels = _expand_onehot_labels(labels, None, C)[0].float()
+        num_sample = max(torch.sum(label_weights.sum(dim=-1) > 0).float().item(), 1.)
+
+        if prob.shape[-1] == 1:
+            prob = prob.squeeze(dim=-1)
+        elif prob.shape[-1] == 2:  # with binary ins
+            pos_prob, neg_prob = prob[..., 0], prob[..., 1]
+            prob = torch.cat([pos_prob, neg_prob])
+            neg_labels = labels.new_zeros(labels.shape)
+            labels = torch.cat([labels, neg_labels])
+            label_weights = torch.cat([label_weights, label_weights])
+
+        if self.loss_type == 'gfocal_loss':
+            loss = self.gfocal_loss(prob, labels, label_weights)
+            if weight is not None:
+                # modified by fei ##############################################################3
+                weight=weight.squeeze(-1)
+        elif self.loss_type == 'binary_cross_entropy':
+            # if self.use_sigmoid:
+            # method 1:
+            # loss = self.loss_cls(
+            #     prob,
+            #     labels,
+            #     label_weights,
+            #     avg_factor=avg_factor,
+            #     reduction_override=reduction_override)
+            # method 2
+            prob = prob.clamp(0, 1)
+            # modified by fei ##############################################################3
+            loss = F.binary_cross_entropy(prob, labels.float(), None, reduction="none")
+        else:
+            raise ValueError()
+        loss = weight_reduce_loss(loss, weight, avg_factor=num_sample) * self.loss_weight
+        return loss, acc, num_sample
\ No newline at end of file
diff --git a/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc b/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b3a8d5ea6440b3f900ca9cd1815a5cf81f0534c
Binary files /dev/null and b/anet_clip/backup/misc/__pycache__/utils.cpython-38.pyc differ
diff --git a/anet_clip/backup/misc/build_vocab.py b/anet_clip/backup/misc/build_vocab.py
new file mode 100644
index 0000000000000000000000000000000000000000..181c9ff27a7edc9d57e16cba107a87857062e24b
--- /dev/null
+++ b/anet_clip/backup/misc/build_vocab.py
@@ -0,0 +1,66 @@
+# coding:utf-8
+import json
+
+# file_path_list = ["data/captiondata/train_modified.json", "data/captiondata/val_1.json", "data/captiondata/val_2.json"]
+file_path_list = ["data/captiondata/yc2/yc2_train.json", "data/captiondata/yc2/yc2_val.json"]
+
+count_threshold = 2 # 4 for anet, 2 for youcook2
+# output_path = './data/vocabulary_activitynet.json'
+output_path = './data/vocabulary_youcook2.json'
+
+mark = [',', ':', '!', '_', ';', '-', '.', '?', '/', '"', '\\n', '\\']
+
+count_vocal = {}
+
+for file_path in file_path_list:
+    data = json.load(open(file_path))
+    video_ids = data.keys()
+    print('video num of ' + file_path.split('/')[-1], len(video_ids))
+    for video_id in video_ids:
+        sentences = data[video_id]["sentences"]
+        for sentence in sentences:
+            for m in mark:
+                if m in sentence:
+                    sentence = sentence.replace(m, " ")
+                sentence = sentence.replace("  ", " ")
+                sentence = sentence.replace("  ", " ")
+                sentence = sentence.replace("  ", " ")
+
+            sentence = sentence.lstrip()
+            sentence = sentence.rstrip()
+            sentence = sentence.lower()
+            sentence = sentence.split(" ")
+            length = len(sentence)
+
+            # print(sentence)
+            for word in sentence:
+                # print(type(word))
+                for m in word:
+                    if m == ' ':
+                        print('warning !')
+                        word = word.replace(m, '')
+                if word == '':
+                    print('warning !')
+                    pass
+                count_vocal[word] = count_vocal.get(word, 0) + 1
+
+print("total word:", sum(count_vocal.values()))
+count_vocal['<bos>'] = 1e10
+count_vocal['<eos>'] = 1e10
+vocab = [word for word, n in count_vocal.items() if n >= count_threshold]
+bad_word = [word for word, n in count_vocal.items() if n < count_threshold]
+bad_count = sum(count_vocal[word] for word in bad_word)
+
+vocab.append('UNK')
+print("number of vocab:", len(vocab))
+print("number of bad word:", len(bad_word))
+print("number of unks:", bad_count)
+
+itow = {i + 1: w for i, w in enumerate(vocab)}
+wtoi = {w: i + 1 for i, w in enumerate(vocab)}
+print(len(itow))
+print(len(wtoi))
+
+json.dump({'ix_to_word': itow,
+           'word_to_ix': wtoi}, open(output_path, 'w'))
+print("saving vocabulary file to {}".format(output_path))
\ No newline at end of file
diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6e18c06bca951f4d0ae6bc5e92a08175f68343c
Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc differ
diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4f6c9c6fb2356fb3b50ae9390f74b21203aa9a4
Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc differ
diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c8794fcb4c80bab0af2f4c0acf2e324518d3630a
Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc differ
diff --git a/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef902352c76a36b2fe11f4a84a4b6186c48b2831
Binary files /dev/null and b/anet_clip/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc differ
diff --git a/anet_clip/backup/misc/detr_utils/box_ops.py b/anet_clip/backup/misc/detr_utils/box_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d7106ba6c48a3cc3827a4bd923b08c7c61213af
--- /dev/null
+++ b/anet_clip/backup/misc/detr_utils/box_ops.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Utilities for bounding box manipulation and GIoU.
+"""
+import torch
+from torchvision.ops.boxes import box_area
+
+def box_cl_to_xy(x):
+    c, l = x.unbind(-1)
+    b = [c - 0.5 * l, c + 0.5 * l]
+    return torch.stack(b, dim=-1)
+
+def box_xy_to_cl(x):
+    x0, x1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (x1 - x0)]
+    return torch.stack(b, dim=-1)
+
+# modified from torchvision to also return the union
+def box_iou(boxes1, boxes2):
+    area1 = boxes1[:, 1] - boxes1[:, 0]
+    area2 = boxes2[:, 1] - boxes2[:, 0]
+    lt = torch.max(boxes1[:, None, 0], boxes2[:, 0])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 1], boxes2[:, 1])  # [N,M,2]
+    inter = (rb - lt).clamp(min=0)  # [N,M,2]
+    union = area1[:, None] + area2 - inter
+    iou = inter / (union + 1e-5)
+    return iou, union
+
+
+def generalized_box_iou(boxes1, boxes2):
+    """
+    Generalized IoU from https://giou.stanford.edu/
+
+    The boxes should be in [x0, y0, x1, y1] format
+
+    Returns a [N, M] pairwise matrix, where N = len(boxes1)
+    and M = len(boxes2)
+    """
+    # degenerate boxes gives inf / nan results
+    # so do an early check
+    assert (boxes1[:, 1:] >= boxes1[:, :1]).all()
+    assert (boxes2[:, 1:] >= boxes2[:, :1]).all()
+    iou, union = box_iou(boxes1, boxes2)
+    lt = torch.min(boxes1[:, None, 0], boxes2[:, 0])
+    rb = torch.max(boxes1[:, None, 1], boxes2[:, 1])
+    area = (rb - lt).clamp(min=0)  # [N,M,2]
+    giou = iou - (area - union) / (area + 1e-5)
+    return giou
\ No newline at end of file
diff --git a/anet_clip/backup/misc/detr_utils/misc.py b/anet_clip/backup/misc/detr_utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..168603606353a959ca9cf6a39fbf2d7f9216e560
--- /dev/null
+++ b/anet_clip/backup/misc/detr_utils/misc.py
@@ -0,0 +1,989 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+
+import torch
+import torch.distributed as dist
+from torch import Tensor
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+# if float(torchvision.__version__[:3]) < 0.7:
+#     from torchvision.ops import _new_empty_tensor
+#     from torchvision.ops.misc import _output_size
+
+
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+from torch import Tensor
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+if float(torchvision.__version__[:3]) < 0.5:
+    import math
+    # from torchvision.ops.misc import _NewEmptyTensorOp
+    def _check_size_scale_factor(dim, size, scale_factor):
+        # type: (int, Optional[List[int]], Optional[float]) -> None
+        if size is None and scale_factor is None:
+            raise ValueError("either size or scale_factor should be defined")
+        if size is not None and scale_factor is not None:
+            raise ValueError("only one of size or scale_factor should be defined")
+        if not (scale_factor is not None and len(scale_factor) != dim):
+            raise ValueError(
+                "scale_factor shape must match input shape. "
+                "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
+            )
+    def _output_size(dim, input, size, scale_factor):
+        # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int]
+        assert dim == 2
+        _check_size_scale_factor(dim, size, scale_factor)
+        if size is not None:
+            return size
+        # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat
+        assert scale_factor is not None and isinstance(scale_factor, (int, float))
+        scale_factors = [scale_factor, scale_factor]
+        # math.floor might return float in py2.7
+        return [
+            int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
+        ]
+elif float(torchvision.__version__[:3]) < 0.7:
+    from torchvision.ops import _new_empty_tensor
+    from torchvision.ops.misc import _output_size
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def get_sha():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+
+    def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
+    sha = 'N/A'
+    diff = "clean"
+    branch = 'N/A'
+    try:
+        sha = _run(['git', 'rev-parse', 'HEAD'])
+        subprocess.check_output(['git', 'diff'], cwd=cwd)
+        diff = _run(['git', 'diff-index', 'HEAD'])
+        diff = "has uncommited changes" if diff else "clean"
+        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+    except Exception:
+        pass
+    message = f"sha: {sha}, status: {diff}, branch: {branch}"
+    return message
+
+
+def collate_fn(batch):
+    batch = list(zip(*batch))
+    batch[0] = nested_tensor_from_tensor_list(batch[0])
+    return tuple(batch)
+
+
+def _max_by_axis(the_list):
+    # type: (List[List[int]]) -> List[int]
+    maxes = the_list[0]
+    for sublist in the_list[1:]:
+        for index, item in enumerate(sublist):
+            maxes[index] = max(maxes[index], item)
+    return maxes
+
+
+def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+    # TODO make this more general
+    if tensor_list[0].ndim == 3:
+        # TODO make it support different-sized images
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], :img.shape[2]] = False
+    else:
+        raise ValueError('not supported')
+    return NestedTensor(tensor, mask)
+
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask: Optional[Tensor], duration=None):
+        self.tensors = tensors
+        self.mask = mask
+        self.duration = duration
+
+    def to(self, device, non_blocking=False):
+        # type: (Device) -> NestedTensor # noqa
+        cast_tensor = self.tensors.to(device, non_blocking=non_blocking)
+        mask = self.mask
+        if mask is not None:
+            assert mask is not None
+            cast_mask = mask.to(device, non_blocking=non_blocking)
+        else:
+            cast_mask = None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def record_stream(self, *args, **kwargs):
+        self.tensors.record_stream(*args, **kwargs)
+        if self.mask is not None:
+            self.mask.record_stream(*args, **kwargs)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return int(os.environ['LOCAL_SIZE'])
+
+
+def get_local_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return int(os.environ['LOCAL_RANK'])
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+        args.dist_url = 'env://'
+        os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count())
+    elif 'SLURM_PROCID' in os.environ:
+        proc_id = int(os.environ['SLURM_PROCID'])
+        ntasks = int(os.environ['SLURM_NTASKS'])
+        node_list = os.environ['SLURM_NODELIST']
+        num_gpus = torch.cuda.device_count()
+        addr = subprocess.getoutput(
+            'scontrol show hostname {} | head -n1'.format(node_list))
+        os.environ['MASTER_PORT'] = os.environ.get('MASTER_PORT', '29500')
+        os.environ['MASTER_ADDR'] = addr
+        os.environ['WORLD_SIZE'] = str(ntasks)
+        os.environ['RANK'] = str(proc_id)
+        os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
+        os.environ['LOCAL_SIZE'] = str(num_gpus)
+        args.dist_url = 'env://'
+        args.world_size = ntasks
+        args.rank = proc_id
+        args.gpu = proc_id % num_gpus
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+
+@torch.no_grad()
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    if target.numel() == 0:
+        return [torch.zeros([], device=output.device)]
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+# def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+#     # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+#     """
+#     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+#     This will eventually be supported natively by PyTorch, and this
+#     class can go away.
+#     """
+#     if float(torchvision.__version__[:3]) < 0.7:
+#         if input.numel() > 0:
+#             return torch.nn.functional.interpolate(
+#                 input, size, scale_factor, mode, align_corners
+#             )
+#
+#         output_shape = _output_size(2, input, size, scale_factor)
+#         output_shape = list(input.shape[:-2]) + list(output_shape)
+#         if float(torchvision.__version__[:3]) < 0.5:
+#             return _NewEmptyTensorOp.apply(input, output_shape)
+#         return _new_empty_tensor(input, output_shape)
+#     else:
+#         return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
+
+
+def get_total_grad_norm(parameters, norm_type=2):
+    parameters = list(filter(lambda p: p.grad is not None, parameters))
+    norm_type = float(norm_type)
+    device = parameters[0].grad.device
+    total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]),
+                            norm_type)
+    return total_norm
+
+
+def inverse_sigmoid(x, eps=1e-5):
+    x = x.clamp(min=0, max=1)
+    x1 = x.clamp(min=eps)
+    x2 = (1 - x).clamp(min=eps)
+    return torch.log(x1/x2)
+
+
+
+# class SmoothedValue(object):
+#     """Track a series of values and provide access to smoothed values over a
+#     window or the global series average.
+#     """
+#
+#     def __init__(self, window_size=20, fmt=None):
+#         if fmt is None:
+#             fmt = "{median:.4f} ({global_avg:.4f})"
+#         self.deque = deque(maxlen=window_size)
+#         self.total = 0.0
+#         self.count = 0
+#         self.fmt = fmt
+#
+#     def update(self, value, n=1):
+#         self.deque.append(value)
+#         self.count += n
+#         self.total += value * n
+#
+#     def synchronize_between_processes(self):
+#         """
+#         Warning: does not synchronize the deque!
+#         """
+#         if not is_dist_avail_and_initialized():
+#             return
+#         t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+#         dist.barrier()
+#         dist.all_reduce(t)
+#         t = t.tolist()
+#         self.count = int(t[0])
+#         self.total = t[1]
+#
+#     @property
+#     def median(self):
+#         d = torch.tensor(list(self.deque))
+#         return d.median().item()
+#
+#     @property
+#     def avg(self):
+#         d = torch.tensor(list(self.deque), dtype=torch.float32)
+#         return d.mean().item()
+#
+#     @property
+#     def global_avg(self):
+#         return self.total / self.count
+#
+#     @property
+#     def max(self):
+#         return max(self.deque)
+#
+#     @property
+#     def value(self):
+#         return self.deque[-1]
+#
+#     def __str__(self):
+#         return self.fmt.format(
+#             median=self.median,
+#             avg=self.avg,
+#             global_avg=self.global_avg,
+#             max=self.max,
+#             value=self.value)
+#
+#
+# def all_gather(data):
+#     """
+#     Run all_gather on arbitrary picklable data (not necessarily tensors)
+#     Args:
+#         data: any picklable object
+#     Returns:
+#         list[data]: list of data gathered from each rank
+#     """
+#     world_size = get_world_size()
+#     if world_size == 1:
+#         return [data]
+#
+#     # serialized to a Tensor
+#     buffer = pickle.dumps(data)
+#     storage = torch.ByteStorage.from_buffer(buffer)
+#     tensor = torch.ByteTensor(storage).to("cuda")
+#
+#     # obtain Tensor size of each rank
+#     local_size = torch.tensor([tensor.numel()], device="cuda")
+#     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+#     dist.all_gather(size_list, local_size)
+#     size_list = [int(size.item()) for size in size_list]
+#     max_size = max(size_list)
+#
+#     # receiving Tensor from all ranks
+#     # we pad the tensor because torch all_gather does not support
+#     # gathering tensors of different shapes
+#     tensor_list = []
+#     for _ in size_list:
+#         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+#     if local_size != max_size:
+#         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+#         tensor = torch.cat((tensor, padding), dim=0)
+#     dist.all_gather(tensor_list, tensor)
+#
+#     data_list = []
+#     for size, tensor in zip(size_list, tensor_list):
+#         buffer = tensor.cpu().numpy().tobytes()[:size]
+#         data_list.append(pickle.loads(buffer))
+#
+#     return data_list
+#
+#
+# def reduce_dict(input_dict, average=True):
+#     """
+#     Args:
+#         input_dict (dict): all the values will be reduced
+#         average (bool): whether to do average or sum
+#     Reduce the values in the dictionary from all processes so that all processes
+#     have the averaged results. Returns a dict with the same fields as
+#     input_dict, after reduction.
+#     """
+#     world_size = get_world_size()
+#     if world_size < 2:
+#         return input_dict
+#     with torch.no_grad():
+#         names = []
+#         values = []
+#         # sort the keys so that they are consistent across processes
+#         for k in sorted(input_dict.keys()):
+#             names.append(k)
+#             values.append(input_dict[k])
+#         values = torch.stack(values, dim=0)
+#         dist.all_reduce(values)
+#         if average:
+#             values /= world_size
+#         reduced_dict = {k: v for k, v in zip(names, values)}
+#     return reduced_dict
+#
+#
+# class MetricLogger(object):
+#     def __init__(self, delimiter="\t"):
+#         self.meters = defaultdict(SmoothedValue)
+#         self.delimiter = delimiter
+#
+#     def update(self, **kwargs):
+#         for k, v in kwargs.items():
+#             if isinstance(v, torch.Tensor):
+#                 v = v.item()
+#             assert isinstance(v, (float, int))
+#             self.meters[k].update(v)
+#
+#     def __getattr__(self, attr):
+#         if attr in self.meters:
+#             return self.meters[attr]
+#         if attr in self.__dict__:
+#             return self.__dict__[attr]
+#         raise AttributeError("'{}' object has no attribute '{}'".format(
+#             type(self).__name__, attr))
+#
+#     def __str__(self):
+#         loss_str = []
+#         for name, meter in self.meters.items():
+#             loss_str.append(
+#                 "{}: {}".format(name, str(meter))
+#             )
+#         return self.delimiter.join(loss_str)
+#
+#     def synchronize_between_processes(self):
+#         for meter in self.meters.values():
+#             meter.synchronize_between_processes()
+#
+#     def add_meter(self, name, meter):
+#         self.meters[name] = meter
+#
+#     def log_every(self, iterable, print_freq, header=None):
+#         i = 0
+#         if not header:
+#             header = ''
+#         start_time = time.time()
+#         end = time.time()
+#         iter_time = SmoothedValue(fmt='{avg:.4f}')
+#         data_time = SmoothedValue(fmt='{avg:.4f}')
+#         space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+#         if torch.cuda.is_available():
+#             log_msg = self.delimiter.join([
+#                 header,
+#                 '[{0' + space_fmt + '}/{1}]',
+#                 'eta: {eta}',
+#                 '{meters}',
+#                 'time: {time}',
+#                 'data: {data}',
+#                 'max mem: {memory:.0f}'
+#             ])
+#         else:
+#             log_msg = self.delimiter.join([
+#                 header,
+#                 '[{0' + space_fmt + '}/{1}]',
+#                 'eta: {eta}',
+#                 '{meters}',
+#                 'time: {time}',
+#                 'data: {data}'
+#             ])
+#         MB = 1024.0 * 1024.0
+#         for obj in iterable:
+#             data_time.update(time.time() - end)
+#             yield obj
+#             iter_time.update(time.time() - end)
+#             if i % print_freq == 0 or i == len(iterable) - 1:
+#                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
+#                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+#                 if torch.cuda.is_available():
+#                     print(log_msg.format(
+#                         i, len(iterable), eta=eta_string,
+#                         meters=str(self),
+#                         time=str(iter_time), data=str(data_time),
+#                         memory=torch.cuda.max_memory_allocated() / MB))
+#                 else:
+#                     print(log_msg.format(
+#                         i, len(iterable), eta=eta_string,
+#                         meters=str(self),
+#                         time=str(iter_time), data=str(data_time)))
+#             i += 1
+#             end = time.time()
+#         total_time = time.time() - start_time
+#         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+#         print('{} Total time: {} ({:.4f} s / it)'.format(
+#             header, total_time_str, total_time / len(iterable)))
+#
+#
+# def get_sha():
+#     cwd = os.path.dirname(os.path.abspath(__file__))
+#
+#     def _run(command):
+#         return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
+#     sha = 'N/A'
+#     diff = "clean"
+#     branch = 'N/A'
+#     try:
+#         sha = _run(['git', 'rev-parse', 'HEAD'])
+#         subprocess.check_output(['git', 'diff'], cwd=cwd)
+#         diff = _run(['git', 'diff-index', 'HEAD'])
+#         diff = "has uncommited changes" if diff else "clean"
+#         branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+#     except Exception:
+#         pass
+#     message = f"sha: {sha}, status: {diff}, branch: {branch}"
+#     return message
+#
+# #
+# # def collate_fn(batch):
+# #     batch = list(zip(*batch))
+# #     batch[0] = nested_tensor_from_tensor_list(batch[0])
+# #     return tuple(batch)
+#
+#
+# def _max_by_axis(the_list):
+#     # type: (List[List[int]]) -> List[int]
+#     maxes = the_list[0]
+#     for sublist in the_list[1:]:
+#         for index, item in enumerate(sublist):
+#             maxes[index] = max(maxes[index], item)
+#     return maxes
+#
+#
+# class NestedTensor(object):
+#     def __init__(self, tensors, mask: Optional[Tensor]):
+#         self.tensors = tensors
+#         self.mask = mask
+#
+#     def to(self, device):
+#         # type: (Device) -> NestedTensor # noqa
+#         cast_tensor = self.tensors.to(device)
+#         mask = self.mask
+#         if mask is not None:
+#             assert mask is not None
+#             cast_mask = mask.to(device)
+#         else:
+#             cast_mask = None
+#         return NestedTensor(cast_tensor, cast_mask)
+#
+#     def decompose(self):
+#         return self.tensors, self.mask
+#
+#     def __repr__(self):
+#         return str(self.tensors)
+#
+# #
+# # def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+# #     # TODO make this more general
+# #     if tensor_list[0].ndim == 3:
+# #         if torchvision._is_tracing():
+# #             # nested_tensor_from_tensor_list() does not export well to ONNX
+# #             # call _onnx_nested_tensor_from_tensor_list() instead
+# #             return _onnx_nested_tensor_from_tensor_list(tensor_list)
+# #
+# #         # TODO make it support different-sized images
+# #         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+# #         # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+# #         batch_shape = [len(tensor_list)] + max_size
+# #         b, c, h, w = batch_shape
+# #         dtype = tensor_list[0].dtype
+# #         device = tensor_list[0].device
+# #         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+# #         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+# #         for img, pad_img, m in zip(tensor_list, tensor, mask):
+# #             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+# #             m[: img.shape[1], :img.shape[2]] = False
+# #     else:
+# #         raise ValueError('not supported')
+# #     return NestedTensor(tensor, mask)
+#
+#
+# # _onnx_nested_tensor_from_tensor_list() is an implementation of
+# # nested_tensor_from_tensor_list() that is supported by ONNX tracing.
+# # @torch.jit.unused
+# # def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
+# #     max_size = []
+# #     for i in range(tensor_list[0].dim()):
+# #         max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
+# #         max_size.append(max_size_i)
+# #     max_size = tuple(max_size)
+# #
+# #     # work around for
+# #     # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+# #     # m[: img.shape[1], :img.shape[2]] = False
+# #     # which is not yet supported in onnx
+# #     padded_imgs = []
+# #     padded_masks = []
+# #     for img in tensor_list:
+# #         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+# #         padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+# #         padded_imgs.append(padded_img)
+# #
+# #         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+# #         padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+# #         padded_masks.append(padded_mask.to(torch.bool))
+# #
+# #     tensor = torch.stack(padded_imgs)
+# #     mask = torch.stack(padded_masks)
+# #
+# #     return NestedTensor(tensor, mask=mask)
+#
+#
+# def setup_for_distributed(is_master):
+#     """
+#     This function disables printing when not in master process
+#     """
+#     import builtins as __builtin__
+#     builtin_print = __builtin__.print
+#
+#     def print(*args, **kwargs):
+#         force = kwargs.pop('force', False)
+#         if is_master or force:
+#             builtin_print(*args, **kwargs)
+#
+#     __builtin__.print = print
+#
+#
+# def is_dist_avail_and_initialized():
+#     if not dist.is_available():
+#         return False
+#     if not dist.is_initialized():
+#         return False
+#     return True
+#
+#
+# def get_world_size():
+#     if not is_dist_avail_and_initialized():
+#         return 1
+#     return dist.get_world_size()
+#
+#
+# def get_rank():
+#     if not is_dist_avail_and_initialized():
+#         return 0
+#     return dist.get_rank()
+#
+#
+# def is_main_process():
+#     return get_rank() == 0
+#
+#
+# def save_on_master(*args, **kwargs):
+#     if is_main_process():
+#         torch.save(*args, **kwargs)
+#
+#
+# def init_distributed_mode(args):
+#     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+#         args.rank = int(os.environ["RANK"])
+#         args.world_size = int(os.environ['WORLD_SIZE'])
+#         args.gpu = int(os.environ['LOCAL_RANK'])
+#     elif 'SLURM_PROCID' in os.environ:
+#         args.rank = int(os.environ['SLURM_PROCID'])
+#         args.gpu = args.rank % torch.cuda.device_count()
+#     else:
+#         print('Not using distributed mode')
+#         args.distributed = False
+#         return
+#
+#     args.distributed = True
+#
+#     torch.cuda.set_device(args.gpu)
+#     args.dist_backend = 'nccl'
+#     print('| distributed init (rank {}): {}'.format(
+#         args.rank, args.dist_url), flush=True)
+#     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+#                                          world_size=args.world_size, rank=args.rank)
+#     torch.distributed.barrier()
+#     setup_for_distributed(args.rank == 0)
+#
+#
+# @torch.no_grad()
+# def accuracy(output, target, topk=(1,)):
+#     """Computes the precision@k for the specified values of k"""
+#     if target.numel() == 0:
+#         return [torch.zeros([], device=output.device)]
+#     maxk = max(topk)
+#     batch_size = target.size(0)
+#
+#     _, pred = output.topk(maxk, 1, True, True)
+#     pred = pred.t()
+#     correct = pred.eq(target.view(1, -1).expand_as(pred))
+#
+#     res = []
+#     for k in topk:
+#         correct_k = correct[:k].view(-1).float().sum(0)
+#         res.append(correct_k.mul_(100.0 / batch_size))
+#     return res
+#
+#
+# # def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+# #     # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+# #     """
+# #     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+# #     This will eventually be supported natively by PyTorch, and this
+# #     class can go away.
+# #     """
+# #     if float(torchvision.__version__[:3]) < 0.7:
+# #         if input.numel() > 0:
+# #             return torch.nn.functional.interpolate(
+# #                 input, size, scale_factor, mode, align_corners
+# #             )
+# #
+# #         output_shape = _output_size(2, input, size, scale_factor)
+# #         output_shape = list(input.shape[:-2]) + list(output_shape)
+# #         return _new_empty_tensor(input, output_shape)
+# #     else:
+# #         return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/anet_clip/backup/misc/utils.py b/anet_clip/backup/misc/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3e979477f4d1a97c28daed7f5592ea6a0a59716
--- /dev/null
+++ b/anet_clip/backup/misc/utils.py
@@ -0,0 +1,352 @@
+# coding:utf-8
+# from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+import torch
+import numpy as np
+import glob
+import shutil
+import os
+import colorlog
+import random
+import six
+from six.moves import cPickle
+import matplotlib as mpl
+
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+
+
+def match_name_keywords(n, name_keywords):
+    out = False
+    for b in name_keywords:
+        if b in n:
+            out = True
+            break
+    return out
+
+
+def decide_two_stage(transformer_input_type, dt, criterion):
+    if transformer_input_type == 'gt_proposals':
+        two_stage = True
+        proposals = dt['gt_boxes']
+        proposals_mask = dt['gt_boxes_mask']
+        criterion.matcher.cost_caption = 0
+        for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']:
+            for key in criterion.weight_dict.keys():
+                if q_k in key:
+                    criterion.weight_dict[key] = 0
+        disable_iterative_refine = True
+    elif transformer_input_type == 'prior_proposals':
+        two_stage = True
+        proposals = dt['gt_boxes']
+        proposals_mask = None
+        criterion.matcher.cost_caption = 0
+        for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']:
+            for key in criterion.weight_dict.keys():
+                if q_k in key:
+                    criterion.weight_dict[key] = 0
+        disable_iterative_refine = False
+    elif transformer_input_type == 'queries':  #
+        two_stage = False
+        proposals = None
+        proposals_mask = None
+        disable_iterative_refine = False
+    else:
+        raise ValueError('Wrong value of transformer_input_type, got {}'.format(transformer_input_type))
+    return two_stage, disable_iterative_refine, proposals, proposals_mask
+
+
+def pickle_load(f):
+    """ Load a pickle.
+    Parameters
+    ----------
+    f: file-like object
+    """
+    if six.PY3:
+        return cPickle.load(f, encoding='latin-1')
+    else:
+        return cPickle.load(f)
+
+
+def pickle_dump(obj, f):
+    """ Dump a pickle.
+    Parameters
+    ----------
+    obj: pickled object
+    f: file-like object
+    """
+    if six.PY3:
+        return cPickle.dump(obj, f, protocol=2)
+    else:
+        return cPickle.dump(obj, f)
+
+
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    # grid_sampler_2d_backward_cuda does not have a deterministic implementation. try set torch.use_deterministic_algorithms(True, warn_only=True) to see the non-deterministic operation
+    # torch.use_deterministic_algorithms(True, warn_only=True)
+
+
+def update_values(dict_from, dict_to):
+    for key, value in dict_from.items():
+        if key not in dict_to.keys():
+            raise AssertionError('key mismatching: {}'.format(key))
+        if isinstance(value, dict):
+            update_values(dict_from[key], dict_to[key])
+        elif value is not None:
+            dict_to[key] = dict_from[key]
+
+
+def print_opt(opt, model, logger):
+    print_alert_message('All args:', logger)
+    for key, item in opt._get_kwargs():
+        logger.info('{} = {}'.format(key, item))
+    print_alert_message('Model structure:', logger)
+    logger.info(model)
+
+
+def build_folder_name(opt):
+    # The dataset
+    # breakpoint()
+    if len(opt.visual_feature_folder) == 2:
+        if ('youcook2' in opt.visual_feature_folder[1]) or ('yc2' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-yc2_yc2'
+        elif ('Tasty' in opt.visual_feature_folder[1]) or ('tasty' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-tasty_tasty'
+        elif ('anet' in opt.visual_feature_folder[1]) or ('Anet' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-anet_anet'
+        # elif ('vlep' in opt.visual_feature_folder[1]) or ('Vlep' in opt.visual_feature_folder[1]):
+        #     dataset_name = 'howto-vlep_vlep'
+        else:
+            raise ValueError('Wrong dataset name')
+        
+        if 'vlep' in opt.visual_feature_folder[0] or 'Vlep' in opt.visual_feature_folder[0]:
+            dataset_name = dataset_name.replace('howto', 'vlep')
+    else:
+        if ('youcook2' in opt.visual_feature_folder[0]) or ('yc2' in opt.visual_feature_folder[0]):
+            dataset_name = 'yc2'
+        elif ('Anet' in opt.visual_feature_folder[0]) or ('anet' in opt.visual_feature_folder[0]):
+            dataset_name = 'anet'
+        elif ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]):
+            dataset_name = 'tasty'
+        elif ('Howto' in opt.visual_feature_folder[0]) or ('howto' in opt.visual_feature_folder[0]):
+            if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]):
+                dataset_name = 'howto_yc2'
+            elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'howto_tasty'
+            elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'howto_anet'
+        elif ('vlep' in opt.visual_feature_folder[0]) or ('Vlep' in opt.visual_feature_folder[0]):
+            if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]):
+                dataset_name = 'vlep_yc2'
+            elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'vlep_tasty'
+            elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'vlep_anet'
+        else:
+            raise ValueError('Wrong dataset name')
+    if 'tasty_14' in opt.dict_file:
+        dataset_name += '_voc14'
+    
+    # The code base
+    if opt.use_anchor:
+        use_anchor = 'anc' # Means learnable anchor is used
+    else:
+        use_anchor = 'ori' # Means original anchor in pdvc is used
+
+    # The state of using pseudo boxes
+    if opt.use_pseudo_box:
+        use_pseudo = 'pbox'
+        if opt.pseudo_box_type == 'similarity':
+            use_pseudo += '(sim)'
+        else:
+            use_pseudo += '({})'.format(opt.pseudo_box_type)
+    else:
+        use_pseudo = 'GT'
+
+    # The viusal-text model used
+    if opt.pretrained_language_model == 'CLIP-ViP':
+        text_model = 'ViP'
+    elif opt.pretrained_language_model == 'UniVL':
+        text_model = 'Uni'
+    else:
+        text_model = opt.pretrained_language_model
+    
+    format_folder_name = '_'.join([dataset_name, use_anchor, use_pseudo, text_model])
+    
+
+
+    return format_folder_name
+
+def build_folder(opt):
+    # breakpoint()
+    if opt.start_from:
+        print('Start training from id:{}'.format(opt.start_from))
+        save_folder = os.path.join(opt.save_dir, opt.start_from)
+        assert os.path.exists(save_folder) and os.path.isdir(save_folder), 'Wrong start_from path: {}'.format(save_folder)
+    else:
+        if not os.path.exists(opt.save_dir):
+            os.mkdir(opt.save_dir)
+        format_folder_name = build_folder_name(opt)
+        # breakpoint()
+        save_foldername = ''
+        if opt.use_pseudo_box:
+            if opt.pseudo_box_type != 'align':
+                if opt.pseudo_box_type == 'similarity_op' or opt.pseudo_box_type == 'similarity_op_order':
+                    save_foldername = '{}_topf{}_beta{}_iter{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.beta, opt.iteration, opt.width_ratio)
+                elif opt.pseudo_box_type == 'similarity_op_order_v2':
+                    save_foldername = '{}_topf{}_iter{}_r{}_th{}'.format(opt.pseudo_box_type, opt.top_frames, opt.iteration, opt.width_ratio, opt.width_th)
+                else:
+                    save_foldername = '{}_topf{}_w{}_{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.window_size, opt.statistic_mode, opt.width_ratio)
+            else:
+                save_folder = 'align'
+        else:
+            save_foldername = 'gtbox'
+
+        if opt.refine_pseudo_box:
+            save_foldername += '_refine_aug({},{})_top{}_{}stage'.format(opt.pseudo_box_aug_num, \
+                                                                        opt.pseudo_box_aug_ratio, \
+                                                                        opt.merge_k_boxes, \
+                                                                        opt.refine_pseudo_stage_num)
+            if opt.pseudo_box_aug_mode == 'uniform':
+                save_foldername += '_uniform'
+            elif opt.pseudo_box_aug_mode == 'random_new':
+                save_foldername += '_random_new'
+            save_foldername += ('_' + opt.merge_criterion)
+            if opt.merge_mode == 'interpolate':
+                save_foldername += '_interpolate'
+            if opt.use_neg_pseudo_box:
+                save_foldername += '_{}neg'.format(opt.num_neg_box)
+            if opt.mil_loss_coef != 1.0:
+                save_foldername += '_mil_coef{}'.format(str(opt.mil_loss_coef))
+            if opt.weighted_mil_loss:
+                save_foldername += '_wMIL'
+            if not opt.focal_mil:
+                save_foldername += '_noFocal'
+            if opt.disable_rematch:
+                save_foldername += '_nomatch'
+            if opt.use_additional_score_layer:
+                save_foldername += '_S-layer'
+            if opt.use_additional_cap_layer:
+                save_foldername += '_C-layer'
+            if 'puyu' in opt.train_caption_file[0]:
+                save_foldername += '_puyu'
+            elif 'mix' in opt.train_caption_file[0]:
+                save_foldername += '_mixlm'
+
+        if opt.id != '':
+            save_foldername += '_{}'.format(opt.id)
+        # breakpoint()
+        # basefilename = os.path.basename(opt.cfg_path)
+        # basefilename = os.path.splitext(basefilename)[0]
+        save_folder = os.path.join(opt.save_dir, format_folder_name)
+        save_folder = os.path.join(save_folder, save_foldername)
+        if os.path.exists(save_folder):
+            print('Results folder "{}" already exists, renaming it...'.format(save_folder))
+            i = 1
+            while 1:
+                new_save_folder = save_folder + '_{}'.format(i)
+                if not os.path.exists(new_save_folder):
+                    save_folder = new_save_folder
+                    break
+                i += 1
+            # wait_flag = input('Warning! Path {} already exists, rename it? (Y/N) : '.format(save_folder))
+            # if wait_flag in ['Y', 'y']:
+            #     # opt.id = opt.id + '_{}'.format(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()))
+            #     # save_folder = os.path.join(opt.save_dir, opt.id)
+            #     # print('Rename opt.id as "{}".'.format(opt.id))
+            #     new_name = input('the new name to be appended :')
+            #     save_folder = save_folder + '_' + new_name
+            # # elif wait_flag in ['N', 'n']:
+            # #     wait_flag_new = input('Are you sure re-write this folder:{}? (Y/N): '.format(save_folder))
+            # #     if wait_flag_new in ['Y', 'y']:
+            # #         return save_folder
+            # #     else:
+            # #         raise AssertionError('Folder {} already exists'.format(save_folder))
+            # else:
+            #     raise AssertionError('Folder {} already exists'.format(save_folder))
+        print('Results folder "{}" does not exist, creating folder...'.format(save_folder))
+        os.makedirs(save_folder)
+        os.makedirs(os.path.join(save_folder, 'prediction'))
+    return save_folder
+
+
+def backup_envir(save_folder):
+    backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc']
+    backup_files = glob.glob('./*.py')
+    for folder in backup_folders:
+        shutil.copytree(folder, os.path.join(save_folder, 'backup', folder))
+    for file in backup_files:
+        shutil.copyfile(file, os.path.join(save_folder, 'backup', file))
+
+
+def create_logger(folder, filename):
+    log_colors = {
+        'DEBUG': 'blue',
+        'INFO': 'white',
+        'WARNING': 'green',
+        'ERROR': 'red',
+        'CRITICAL': 'yellow',
+    }
+
+    import logging
+    logger = logging.getLogger('DVC')
+    # %(filename)s$RESET:%(lineno)d
+    # LOGFORMAT = "%(log_color)s%(asctime)s [%(log_color)s%(filename)s:%(lineno)d] | %(log_color)s%(message)s%(reset)s |"
+    LOGFORMAT = ""
+    LOG_LEVEL = logging.DEBUG
+    logging.root.setLevel(LOG_LEVEL)
+    stream = logging.StreamHandler()
+    stream.setLevel(LOG_LEVEL)
+    stream.setFormatter(colorlog.ColoredFormatter(LOGFORMAT, datefmt='%d %H:%M', log_colors=log_colors))
+
+    # print to log file
+    hdlr = logging.FileHandler(os.path.join(folder, filename))
+    hdlr.setLevel(LOG_LEVEL)
+    # hdlr.setFormatter(logging.Formatter("[%(asctime)s] %(message)s"))
+    hdlr.setFormatter(logging.Formatter("%(message)s"))
+    logger.addHandler(hdlr)
+    logger.addHandler(stream)
+    return logger
+
+
+def print_alert_message(str, logger=None):
+    msg = '*' * 20 + ' ' + str + ' ' + '*' * (58 - len(str))
+    if logger:
+        logger.info('\n\n' + msg)
+    else:
+        print(msg)
+
+
+def set_lr(optimizer, lr):
+    for group in optimizer.param_groups:
+        group['lr'] = lr
+
+
+def clip_gradient(optimizer, grad_clip):
+    for group in optimizer.param_groups:
+        for i, param in enumerate(group['params']):
+            if param.grad is not None:
+                param.grad.data.clamp_(-grad_clip, grad_clip)
+
+
+if __name__ == '__main__':
+    # import opts
+    #
+    # info = {'opt': vars(opts.parse_opts()),
+    #         'loss': {'tap_loss': 0, 'tap_reg_loss': 0, 'tap_conf_loss': 0, 'lm_loss': 0}}
+    # record_this_run_to_csv(info, 'save/results_all_runs.csv')
+
+    logger = create_logger('./', 'mylogger.log')
+    logger.info('debug')
+    logger.info('test2')
diff --git a/anet_clip/backup/opts.py b/anet_clip/backup/opts.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2edf8fa4918e9b960cd26d0fa561d3b1155b4ff
--- /dev/null
+++ b/anet_clip/backup/opts.py
@@ -0,0 +1,311 @@
+import argparse
+import time
+import yaml
+import os
+import numpy as np
+
+def parse_opts():
+    parser = argparse.ArgumentParser()
+
+    # configure of this run
+    parser.add_argument('--cfg_path', type=str, required=True, help='config file')
+    parser.add_argument('--id', type=str, default='', help='id of this run. Results and logs will saved in this folder ./save/id')
+    parser.add_argument('--gpu_id', type=str, nargs='+', default=[])
+    parser.add_argument('--disable_tqdm', action='store_true')
+    parser.add_argument('--seed', type=int, default=777)
+    parser.add_argument('--random_seed',  action='store_true', help='choose a random seed from {1,...,1000}')
+    parser.add_argument('--disable_cudnn', type=int, default=0, help='disable cudnn may solve some unknown bugs')
+    parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging')
+    parser.add_argument('--device', default='cuda', choices=['cpu', 'cuda'], help='device to use for training / testing')
+    parser.add_argument('--map', action='store_true', default=False, help='map a100 data path to 3090 data path')
+    # parser.add_argument('--extra_id', type=str, default='', help='extra config for listed in the folder name')
+
+    #  ***************************** INPUT DATA PATH *****************************
+    parser.add_argument('--train_caption_file', type=str,
+                        default='data/anet/captiondata/train_modified.json', help='')
+    parser.add_argument('--invalid_video_json', type=str, nargs='+', default=[])
+    parser.add_argument('--val_caption_file', type=str, default='data/anet/captiondata/val_1.json')
+    parser.add_argument('--visual_feature_folder', type=str, default='data/anet/resnet_bn')
+    parser.add_argument('--text_feature_folder', type=str, default=None)
+    parser.add_argument('--gt_file_for_auc', type=str, nargs='+', default='data/anet/captiondata/val_all.json')
+    parser.add_argument('--gt_file_for_eval', type=str, nargs='+', default=['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'])
+    parser.add_argument('--gt_file_for_para_eval', type=str, nargs='+', default= ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'])
+    parser.add_argument('--dict_file', type=str, default='data/anet/vocabulary_activitynet.json', help='')
+    parser.add_argument('--criteria_for_best_ckpt', type=str, default='overall', choices=['dvc', 'pc', 'overall'], help='for dense video captioning, use soda_c + METEOR as the criteria'
+                                                                                                         'for paragraph captioning, choose the best para_METEOR+para_CIDEr+para_BLEU4'
+                                                                                                         'for overall, select BLEU4 + METEOR + F1_score')
+
+    parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet', 'UniVL', 'CLIP', 'CLIP-ViP'])
+    parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector')
+
+    parser.add_argument('--start_from', type=str, default='', help='id of the run with incompleted training')
+    parser.add_argument('--start_from_mode', type=str, choices=['best', 'last'], default="last")
+    parser.add_argument('--pretrain', type=str, choices=['full', 'encoder', 'decoder'])
+    parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth')
+
+    #  ***************************** DATALOADER OPTION *****************************
+    parser.add_argument('--nthreads', type=int, default=4)
+    parser.add_argument('--data_norm', type=int, default=0)
+    parser.add_argument('--data_rescale', type=int, default=1)
+
+    parser.add_argument('--feature_sample_rate', type=int, default=1)
+    parser.add_argument('--train_proposal_sample_num', type=int,
+                        default=24,
+                        help='number of sampled proposals (or proposal sequence), a bigger value may be better')
+    parser.add_argument('--gt_proposal_sample_num', type=int, default=30)
+    parser.add_argument('--ft_gt_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.')
+    parser.add_argument('--pre_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.')
+
+
+    #  ***************************** Caption Decoder  *****************************
+    parser.add_argument('--vocab_size', type=int, default=5747)
+    parser.add_argument('--wordRNN_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'],
+                        help='C:clip-level features, E: event-level features, C+E: both')
+    parser.add_argument('--caption_decoder_type', type=str, default="light",
+                        choices=['none','light', 'standard'])
+    parser.add_argument('--rnn_size', type=int, default=512,
+                        help='size of the rnn in number of hidden nodes in each layer')
+    parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the RNN')
+    parser.add_argument('--input_encoding_size', type=int, default=512,
+                        help='the encoding size of each token in the vocabulary')
+    parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP')
+    parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN')
+    parser.add_argument('--max_caption_len', type=int, default=30, help='')
+
+    #  ***************************** Transformer  *****************************
+    parser.add_argument('--hidden_dim', type=int, default=512)
+    parser.add_argument('--num_queries', type=int, default=100)
+    parser.add_argument('--hidden_dropout_prob', type=float, default=0.5)
+    parser.add_argument('--layer_norm_eps', type=float, default=1e-12)
+    parser.add_argument('--caption_cost_type', type=str, default='loss')
+    parser.add_argument('--set_cost_caption', type=float, default=0)
+    parser.add_argument('--set_cost_class', type=float, default=1)
+    parser.add_argument('--set_cost_bbox', type=float, default=5)
+    parser.add_argument('--set_cost_giou', type=float, default=2)
+    parser.add_argument('--cost_alpha', type=float, default=0.25)
+    parser.add_argument('--cost_gamma', type=float, default=2)
+
+    parser.add_argument('--bbox_loss_coef', default=5, type=float)
+    parser.add_argument('--giou_loss_coef', default=2, type=float)
+    parser.add_argument('--count_loss_coef', default=0, type=float)
+    parser.add_argument('--caption_loss_coef', default=0, type=float)
+    parser.add_argument('--eos_coef', default=0.1, type=float,
+                        help="Relative classification weight of the no-object class")
+    parser.add_argument('--num_classes', type=int, default=1)
+    parser.add_argument('--dec_layers', type=int, default=6)
+    parser.add_argument('--enc_layers', type=int, default=6)
+    parser.add_argument('--transformer_ff_dim', type=int, default=2048)
+    parser.add_argument('--transformer_dropout_prob', type=float, default=0.1)
+    parser.add_argument('--frame_embedding_num', type=int, default = 100)
+    parser.add_argument('--sample_method', type=str, default = 'nearest', choices=['nearest', 'linear'])
+    parser.add_argument('--fix_xcw', type=int, default=0)
+
+    # ***************************** Learnable anchor *****************************
+    parser.add_argument('--use_anchor', default=False, action='store_true')
+    parser.add_argument('--random_anchor_init', default=True, action='store_false')
+    parser.add_argument('--prior_anchor_duration_init', default=True, action='store_false')
+
+    # ***************************** Text-query alignment *****************************
+    parser.add_argument('--matcher_type', type=str, default='default', choices=['default', 'DTW', 'Sim'])
+    # === For Text encoder ===
+    parser.add_argument('--pretrained_language_model', type=str, default='UniVL', \
+                        choices=['UniVL', 'CLIP', 'CLIP-ViP'], help='Pretrained hugging face model')
+    parser.add_argument('--text_hidden_dim', type=int, default=768, help='hidden dim of text encoder')
+    parser.add_argument('--max_text_input_len', type=int, default=32, help='')
+    parser.add_argument('--max_pos_num', type=int, default=500)
+    parser.add_argument('--huggingface_cache_dir', type=str, default='.cache')
+    parser.add_argument('--text_encoder_learning_strategy', type=str, default='frozen',choices=('frozen'))
+
+    # === For generate_pesudo_bbox ===
+    parser.add_argument('--use_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--pseudo_box_type', type=str, default='similarity', choices=['align', 'similarity', 'weight_sim', 'weight_index', 'modeframe'])
+
+    # 1) For different ways of generating pseudo box
+    parser.add_argument('--top_frames', type=int, default=15)
+    parser.add_argument('--window_size', type=int, default=2)
+    parser.add_argument('--statistic_mode', type=str, default='median', choices=['mode', 'median'])
+    parser.add_argument('--width_ratio', type=float, default=-1)
+    parser.add_argument('--beta', type=float, default=1, help="weight for overlap loss")
+    parser.add_argument('--width_th', type=float, default=0.5, help="threshold for width")
+    parser.add_argument('--iteration', type=int, default=3, help="iteration for pseudo box generation")
+    # 2) For box refinement
+    parser.add_argument('--pseudo_box_aug', default=False, action='store_true')
+    parser.add_argument('--pseudo_box_aug_num', type=int, default=5)
+    parser.add_argument('--pseudo_box_aug_ratio', type=float, default=0.1)
+    parser.add_argument('--pseudo_box_aug_mode', default='random', choices=['random', 'uniform'])
+    parser.add_argument('--refine_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--use_additional_score_layer', default=False, action='store_true')
+    parser.add_argument('--use_additional_cap_layer', default=False, action='store_true')
+    parser.add_argument('--merge_k_boxes', type=int, default=3)
+    parser.add_argument('--merge_criterion', type=str, choices=['cap_topk', 'ins_topk', 'ins_cap_topk'], default='cap_topk')
+    parser.add_argument('--merge_mode', type=str, choices=['weighted_sum, interpolate'], default='weighted_sum')
+    parser.add_argument('--refine_pseudo_stage_num', type=int, default=2)
+    parser.add_argument('--use_query_box_for_refine', default=False, action='store_true')
+    parser.add_argument('--norm_ins_score', default='sigmoid', choices=['sigmoid', 'softmax'])
+    parser.add_argument('--cap_prob_clip', default=False, action='store_true')
+    parser.add_argument('--use_neg_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--num_neg_box', default=10, type=int)
+    parser.add_argument('--weighted_mil_loss', default=False, action='store_true')
+    parser.add_argument('--focal_mil', default=False, action='store_true')
+    parser.add_argument('--disable_rematch', default=False, action='store_true')
+    parser.add_argument('--start_refine_epoch', default=-1, type=int)
+    
+
+    # === For DTW ===
+    parser.add_argument('--align_keep_percentile', type=float, default=0.1)
+    parser.add_argument('--align_top_band_size', type=int, default=0)
+    parser.add_argument('--align_drop_z', type=int, default=0)
+    parser.add_argument('--align_one_to_many', default=False, action='store_true')
+    parser.add_argument('--align_many_to_one', default=False, action='store_true')
+    parser.add_argument('--align_contiguous', default=False, action='store_true')
+    
+    # === For Sim matcher
+    parser.add_argument('--set_cost_sim', type=float, default=1.0)
+
+    # === For contrastive ===
+    parser.add_argument('--enable_contrastive', default=False, action='store_true', help='enable contrastive learning')
+    parser.add_argument('--disable_contrastive_projection', default=False, action='store_true', help='disable contrastive projection layers')
+    parser.add_argument('--contrastive_hidden_size', type=int, default=128, help='Contrastive hidden size')
+    parser.add_argument('--contrastive_loss_start_coef', type=float, default=0.1, help='Weight of contrastive loss')
+    parser.add_argument('--contrastive_loss_temperature', type=float, default=0.1, help='Temperature of cl temperature')
+    parser.add_argument('--enable_cross_video_cl', type=bool, default=True, help='Enable cross video contrastive loss')
+    parser.add_argument('--enable_e2t_cl', default=True, action='store_true', help=' enable event-to-text contrastive')
+    parser.add_argument('--enable_bg_for_cl', default=True, action='store_true', help=' add a class for background events')
+    parser.add_argument('--set_cost_cl', type=float, default=0.0)
+    parser.add_argument('--cl_schedule_val', type=float, nargs='+', default=[0, 0.1])
+    parser.add_argument('--cl_schedule_time', type=int, nargs='+', default=[0, 2])
+
+
+
+    # ***************************** Prior *****************************
+    parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all'])
+
+    #  ***************************** OPTIMIZER *****************************
+    parser.add_argument('--training_scheme', type=str, default='all', choices=['cap_head_only', 'no_cap_head', 'all'])
+    parser.add_argument('--epoch', type=int, default=25)
+    parser.add_argument('--batch_size', type=int, default=1, help='batch_size')
+    parser.add_argument('--batch_size_for_eval', type=int, default=1, help='')
+    parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value')
+    parser.add_argument('--optimizer_type', type=str, default='adam')
+    parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay')
+
+    parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature')
+    parser.add_argument('--learning_rate_decay_start', type=float, default=8)
+    parser.add_argument('--learning_rate_decay_every', type=float, default=3)
+    parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5)
+
+    #  ***************************** SAVING AND LOGGING *****************************
+    parser.add_argument('--min_epoch_when_save', type=int, default=-1)
+    parser.add_argument('--save_checkpoint_every', type=int, default=1)
+    parser.add_argument('--save_all_checkpoint', action='store_true')
+    parser.add_argument('--save_dir', type=str, default='/mnt/data/pjlab-3090-sport/wuhao/logs/dibs', help='directory to store checkpointed models')
+
+    #  ***************************** For Deformable DETR *************************************
+    parser.add_argument('--lr_backbone_names', default=["None"], type=str, nargs='+')
+    parser.add_argument('--lr_backbone', default=2e-5, type=float)
+    parser.add_argument('--lr_proj', default=0, type=int)
+    parser.add_argument('--lr_linear_proj_names', default=['reference_points', 'sampling_offsets'], type=str, nargs='+')
+    parser.add_argument('--lr_linear_proj_mult', default=0.1, type=float)
+
+    # Variants of Deformable DETR
+    parser.add_argument('--with_box_refine', default=False, action='store_true')
+    parser.add_argument('--transformer_input_type', default='queries', choices=['gt_proposals', 'prior_proposals', 'learnt_proposals', 'queries'])
+
+    # * Backbone
+    parser.add_argument('--backbone', default=None, type=str,
+                        help="Name of the convolutional backbone to use")
+    parser.add_argument('--dilation', action='store_true',
+                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
+    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
+                        help="Type of positional embedding to use on top of the image features")
+    parser.add_argument('--position_embedding_scale', default=2 * np.pi, type=float,
+                        help="position / size * scale")
+    parser.add_argument('--num_feature_levels', default=4, type=int, help='number of feature levels')
+
+    # * Transformer
+
+    parser.add_argument('--nheads', default=8, type=int,
+                        help="Number of attention heads inside the transformer's attentions")
+    parser.add_argument('--dec_n_points', default=4, type=int)
+    parser.add_argument('--enc_n_points', default=4, type=int)
+
+    parser.add_argument('--share_caption_head', type = int ,default=1)
+
+    parser.add_argument('--cap_nheads', default=8, type=int)
+    parser.add_argument('--cap_dec_n_points', default=4, type=int)
+    parser.add_argument('--cap_num_feature_levels', default=4, type=int)
+    parser.add_argument('--disable_mid_caption_heads', action='store_true')
+
+    # Loss
+    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
+                        help="Disables auxiliary decoding losses (loss at each layer)")
+
+
+    # * Loss coefficients
+
+    parser.add_argument('--cls_loss_coef', default=2, type=float)
+    parser.add_argument('--self_iou_loss_coef', default=0.0, type=float)
+    parser.add_argument('--ref_rank_loss_coef', default=0.1, type=float)
+    parser.add_argument('--mil_loss_coef', default=1.0, type=float)
+    parser.add_argument('--focal_alpha', default=0.25, type=float)
+    parser.add_argument('--focal_gamma', default=2., type=float)
+
+
+    #***************************** Event counter *****************************
+    parser.add_argument('--max_eseq_length', default=10, type=int)
+    parser.add_argument('--lloss_gau_mask', default=1, type=int)
+    parser.add_argument('--lloss_beta', default=1, type=float)
+
+    # scheduled sampling
+    parser.add_argument('--scheduled_sampling_start', type=int, default=-1,
+                        help='at what iteration to start decay gt probability')
+    parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob')
+    parser.add_argument('--scheduled_sampling_increase_every', type=int, default=2,
+                        help='every how many iterations thereafter to gt probability')
+    parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05,
+                        help='How much to update the prob')
+    parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25,
+                        help='Maximum scheduled sampling prob.')
+
+    # reranking
+    parser.add_argument('--ec_alpha', type=float, default=0.3)
+    args = parser.parse_args()
+
+    if args.cfg_path:
+        import_cfg(args.cfg_path, vars(args))
+
+    if args.random_seed:
+        import random
+        seed = int(random.random() * 1000)
+        new_id = args.id + '_seed{}'.format(seed)
+        save_folder = os.path.join(args.save_dir, new_id)
+        while os.path.exists(save_folder):
+            seed = int(random.random() * 1000)
+            new_id = args.id + '_seed{}'.format(seed)
+            save_folder = os.path.join(args.save_dir, new_id)
+        args.id = new_id
+        args.seed = seed
+
+    if args.debug:
+        args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
+        args.save_checkpoint_every = 1
+        args.shuffle = 0
+
+    if args.caption_decoder_type == 'none':
+        assert args.caption_loss_coef == 0
+        assert args.set_cost_caption == 0
+
+    print("args.id: {}".format(args.id))
+    return args
+
+def import_cfg(cfg_path, args):
+    with open(cfg_path, 'r') as handle:
+        yml = yaml.load(handle, Loader=yaml.FullLoader)
+        if 'base_cfg_path' in yml:
+            base_cfg_path = yml['base_cfg_path']
+            import_cfg(base_cfg_path, args)
+        args.update(yml)
+    pass
+if __name__ == '__main__':
+    opt = parse_opts()
+    print(opt)
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/CaptioningHead/LSTM.py b/anet_clip/backup/pdvc/CaptioningHead/LSTM.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b44fae2e15520e0c09c298d233e686c9b45d36e
--- /dev/null
+++ b/anet_clip/backup/pdvc/CaptioningHead/LSTM.py
@@ -0,0 +1,174 @@
+# This file contains ShowAttendTell and AllImg model
+
+# ShowAttendTell is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention
+# https://arxiv.org/abs/1502.03044
+
+# AllImg is a model where
+# img feature is concatenated with word embedding at every time step as the input of lstm
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+
+import numpy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import *
+
+class Captioner(nn.Module):
+    def __init__(self, opt):
+        super(Captioner, self).__init__()
+        self.opt = opt
+
+        self.vocab_size = opt.vocab_size
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.max_caption_len = opt.max_caption_len
+
+        self.ss_prob = 0.0 # Schedule sampling probability
+        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
+
+        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
+        self.dropout = nn.Dropout(self.drop_prob_lm)
+
+        self.init_weights()
+
+    def init_weights(self):
+        initrange = 0.1
+        self.embed.weight.data.uniform_(-initrange, initrange)
+        self.logit.bias.data.fill_(0)
+        self.logit.weight.data.uniform_(-initrange, initrange)
+
+    def init_hidden(self, batch_size):
+        weight = next(self.parameters()).data
+        return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(),
+                weight.new(self.num_layers, batch_size, self.rnn_size).zero_())  # (h0, c0)
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        max_len = input.shape[1]
+        output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def forward(self, event, clip, clip_mask, seq):
+        batch_size = clip.shape[0]
+
+        state = self.init_hidden(batch_size)
+        outputs = []
+        seq = seq.long()
+
+        for i in range(seq.size(1) - 1):
+            if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample
+                sample_prob = clip.data.new(batch_size).uniform_(0, 1)
+                sample_mask = sample_prob < self.ss_prob
+                if sample_mask.sum() == 0:
+                    it = seq[:, i].clone()
+                else:
+                    sample_ind = sample_mask.nonzero().view(-1)
+                    it = seq[:, i].data.clone()
+                    prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1)
+                    it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind))
+                    it = Variable(it, requires_grad=False)
+            else:
+                it = seq[:, i].clone()
+                # break if all the sequences end
+            if i >= 1 and seq[:, i].data.sum() == 0:
+                break
+
+            output, state = self.get_logprobs_state(it, event, clip, clip_mask, state)
+            outputs.append(output)
+
+        return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
+
+
+    def get_logprobs_state(self, it, event , clip, clip_mask, state):
+        xt = self.embed(it)
+        output, state = self.core(xt, event , clip, clip_mask, state)
+        logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1)
+        return logprobs, state
+
+    def sample(self, event , clip, clip_mask, opt={}):
+
+        sample_max = opt.get('sample_max', 1)
+        beam_size = opt.get('beam_size', 1)
+        temperature = opt.get('temperature', 1.0)
+
+        batch_size = clip.shape[0]
+
+        state = self.init_hidden(batch_size)
+
+        seq = []
+        seqLogprobs = []
+
+        for t in range(self.max_caption_len + 1):
+            if t == 0: # input <bos>
+                it = clip.data.new(batch_size).long().zero_()
+            elif sample_max:
+                sampleLogprobs, it = torch.max(logprobs.data, 1)
+                it = it.view(-1).long()
+            else:
+                if temperature == 1.0:
+                    prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1)
+                else:
+                    # scale logprobs by temperature
+                    prob_prev = torch.exp(torch.div(logprobs.data, temperature))
+                it = torch.multinomial(prob_prev, 1)
+                sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions
+                it = it.view(-1).long() # and flatten indices for downstream processing
+
+            logprobs, state = self.get_logprobs_state(it, event , clip, clip_mask, state)
+
+            if t >= 1:
+                # stop when all finished
+                if t == 1:
+                    unfinished = it > 0
+                else:
+                    unfinished = unfinished & (it > 0)
+                if unfinished.sum() == 0:
+                    break
+                it = it * unfinished.type_as(it)
+                seq.append(it) #seq[t] the input of t+2 time step
+                seqLogprobs.append(sampleLogprobs.view(-1))
+
+        if seq==[] or len(seq)==0:
+            return [],[]
+        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
+
+class AllImgCore(nn.Module):
+    def __init__(self, opt):
+        super(AllImgCore, self).__init__()
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.att_feat_size = opt.clip_context_dim
+
+        self.opt = opt
+        self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type
+        self.input_dim = self.decide_input_feats_dim()
+        self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim,
+                           self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm)
+        assert self.wordRNN_input_feats_type == 'C'
+
+    def decide_input_feats_dim(self):
+        dim = 0
+        if 'E' in self.wordRNN_input_feats_type:
+            dim += self.opt.event_context_dim
+        if 'C' in self.wordRNN_input_feats_type:
+            dim += self.opt.clip_context_dim
+        return dim
+
+    def forward(self, xt, event, clip, clip_mask, state):
+        input_feats = (clip * clip_mask.unsqueeze(2)).sum(1) / (clip_mask.sum(1, keepdims=True) + 1e-5)
+        output, state = self.rnn(torch.cat([xt, input_feats], 1).unsqueeze(0), state)
+        return output.squeeze(0), state
+
+
+class LightCaptioner(Captioner):
+    def __init__(self, opt):
+        super(LightCaptioner, self).__init__(opt)
+        self.core = AllImgCore(opt)
diff --git a/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py b/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py
new file mode 100644
index 0000000000000000000000000000000000000000..918fb0ccf89416929b4cee8c1deadd7c99d586ae
--- /dev/null
+++ b/anet_clip/backup/pdvc/CaptioningHead/LSTM_DSA.py
@@ -0,0 +1,289 @@
+# This file contains ShowAttendTell and AllImg model
+
+# ShowAttendTell(Soft attention) is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention
+# https://arxiv.org/abs/1502.03044
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import *
+
+from pdvc.ops.modules import MSDeformAttnCap
+
+class Captioner(nn.Module):
+    def __init__(self, opt):
+        super(Captioner, self).__init__()
+        self.opt = opt
+
+        self.vocab_size = opt.vocab_size
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.max_caption_len = opt.max_caption_len
+
+        self.ss_prob = 0.0 # Schedule sampling probability
+        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
+
+        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
+        self.dropout = nn.Dropout(self.drop_prob_lm)
+
+        self.init_weights()
+
+    def init_weights(self):
+        initrange = 0.1
+        self.embed.weight.data.uniform_(-initrange, initrange)
+        self.logit.bias.data.fill_(0)
+        self.logit.weight.data.uniform_(-initrange, initrange)
+
+    def init_hidden(self, batch_size):
+        weight = next(self.parameters()).data
+        return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(),
+                weight.new(self.num_layers, batch_size, self.rnn_size).zero_())  # (h0, c0)
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        max_len = input.shape[1]
+        output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def build_prob(self, input, target, mask):
+        '''
+        Calculate the sentence-level predicted prob for each GT sentence of each query
+        input: [num_sentence, max_length, num_words_voc]
+        '''
+        # breakpoint()
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) # [num_sentence, max_length, num_words_voc]
+        max_len = input.shape[1]
+        # output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6)
+        output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def forward(self,hs, reference, others, cap_tensor):
+        seq = cap_tensor
+        vid_num, query_num, _ = hs.shape
+        assert vid_num == 1
+
+        reference_points = reference
+        input_flatten = others['memory']
+        input_spatial_shapes = others['spatial_shapes']
+        input_level_start_index = others['level_start_index']
+        input_padding_mask = others['mask_flatten']
+        if reference_points.shape[-1] == 2:
+            reference_points = reference_points[:, :, None] \
+                                     * torch.stack([others['valid_ratios']]*2, -1)[:, None]
+        elif reference_points.shape[-1] == 1: 
+            reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None, :, None]
+
+        query = hs
+        batch_size = query.shape[1]
+        state = self.init_hidden(batch_size)
+        outputs = []
+        raw_probs = []
+        seq = seq.long()
+
+        n_levels = self.core.n_levels
+        if n_levels < self.core.opt.num_feature_levels:
+            input_spatial_shapes = input_spatial_shapes[:n_levels]
+            input_level_start_index = input_level_start_index[:n_levels]
+            total_input_len = torch.prod(input_spatial_shapes, dim=1).sum()
+            input_flatten = input_flatten[:, :total_input_len]
+            input_padding_mask = input_padding_mask[:, :total_input_len]
+            reference_points = reference_points[:, :, :n_levels]
+            pass
+
+        for i in range(seq.size(1) - 1):
+            if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample
+                sample_prob = hs.new_zeros(batch_size).uniform_(0, 1)
+                sample_mask = sample_prob < self.ss_prob
+                if sample_mask.sum() == 0:
+                    it = seq[:, i].clone()
+                else:
+                    sample_ind = sample_mask.nonzero().view(-1)
+                    it = seq[:, i].data.clone()
+                    prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1)
+                    it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind))
+                    it = Variable(it, requires_grad=False)
+            else:
+                it = seq[:, i].clone()
+                # break if all the sequences end
+            if i >= 1 and seq[:, i].data.sum() == 0:
+                break
+
+            output, state, raw_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask)
+            outputs.append(output)
+            raw_probs.append(raw_prob)
+
+        if self.opt.refine_pseudo_box and self.training:
+            return torch.cat([_.unsqueeze(1) for _ in outputs], 1), torch.cat([_.unsqueeze(1) for _ in raw_probs], 1)
+
+        return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
+
+
+    def get_logprobs_state(self, it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask):
+        xt = self.embed(it)
+        output, state = self.core(xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask)
+        logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1)
+        softmax_probs = F.softmax(self.logit(self.dropout(output)), dim=1)
+        raw_probs = self.logit(self.dropout(output))
+        # raw_probs: [max_num_word, vocab_size+1]
+        return logprobs, state, raw_probs
+
+    def sample(self,hs, reference, others, opt={}):
+
+        vid_num, query_num, _ = hs.shape
+        assert vid_num == 1
+        batch_size = vid_num * query_num
+        sample_max = opt.get('sample_max', 1)
+        beam_size = opt.get('beam_size', 1)
+        temperature = opt.get('temperature', 1.0)
+
+        reference_points = reference
+        input_flatten = others['memory']
+        input_spatial_shapes = others['spatial_shapes']
+        input_level_start_index = others['level_start_index']
+        input_padding_mask = others['mask_flatten']
+        if reference_points.shape[-1] == 2:
+            reference_points = reference_points[:, :, None] \
+                                     * torch.stack([others['valid_ratios']]*2, -1)[:, None]
+        elif reference_points.shape[-1] == 1:
+            reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None,:, None]
+        query = hs
+
+        n_levels = self.core.n_levels
+        if n_levels < self.core.opt.num_feature_levels:
+            input_spatial_shapes = input_spatial_shapes[:n_levels]
+            input_level_start_index = input_level_start_index[:n_levels]
+            total_input_len = torch.prod(input_spatial_shapes, dim=1).sum()
+            input_flatten = input_flatten[:, :total_input_len]
+            input_padding_mask = input_padding_mask[:, :total_input_len]
+            reference_points = reference_points[:, :, :n_levels]
+            pass
+
+        state = self.init_hidden(batch_size)
+
+        seq = []
+        seqLogprobs = []
+        #breakpoint()
+
+        for t in range(self.max_caption_len + 1):
+            if t == 0: # input <bos>
+                it = hs.data.new(batch_size).long().zero_()
+            elif sample_max:
+                sampleLogprobs, it = torch.max(logprobs.data, 1)
+                it = it.view(-1).long()
+            else:
+                if temperature == 1.0:
+                    prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1)
+                else:
+                    # scale logprobs by temperature
+                    prob_prev = torch.exp(torch.div(logprobs.data, temperature))
+                it = torch.multinomial(prob_prev, 1)
+                sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions
+                it = it.view(-1).long() # and flatten indices for downstream processing
+
+            logprobs, state, softmax_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask)
+
+            if t >= 1:
+                # stop when all finished
+                if t == 1:
+                    unfinished = it > 0
+                else:
+                    unfinished = unfinished & (it > 0)
+                if unfinished.sum() == 0:
+                    break
+                it = it * unfinished.type_as(it)
+                seq.append(it) #seq[t] the input of t+2 time step
+                seqLogprobs.append(sampleLogprobs.view(-1))
+
+        if seq==[] or len(seq)==0:
+            return [],[]
+        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
+
+
+class ShowAttendTellCore(nn.Module):
+
+    def __init__(self, opt):
+        super(ShowAttendTellCore, self).__init__()
+        self.input_encoding_size = opt.input_encoding_size
+
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        #self.fc_feat_size = opt.fc_feat_size
+        self.att_feat_size = int(opt.clip_context_dim / opt.cap_nheads)
+        self.att_hid_size = opt.att_hid_size
+
+        self.opt = opt
+        self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type
+        self.input_dim = opt.hidden_dim * 2
+
+        self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim ,
+                                                      self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm)
+        self.att_drop = nn.Dropout(0.5)
+
+        d_model = opt.hidden_dim
+        self.n_levels = opt.cap_num_feature_levels
+        self.n_heads = opt.cap_nheads
+        self.n_points = opt.cap_dec_n_points
+
+        self.deformable_att = MSDeformAttnCap(d_model, self.n_levels, self.n_heads, self.n_points)
+
+        if self.att_hid_size > 0:
+            self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size)
+            self.h2att = nn.Linear(self.rnn_size, self.att_hid_size)
+            self.alpha_net = nn.Linear(self.att_hid_size, 1)
+
+    def get_input_feats(self, event, att_clip):
+        input_feats = []
+        if 'E' in self.wordRNN_input_feats_type:
+            input_feats.append(event)
+        if 'C' in self.wordRNN_input_feats_type:
+            input_feats.append(att_clip)
+        input_feats = torch.cat(input_feats,1)
+        return input_feats
+
+    def forward(self,xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask):
+
+        joint_query = torch.cat((state[0][-1].unsqueeze(0), query), 2)
+        # (N_, N_q, C)
+
+        N_, Lq_, L_, _ = reference_points.shape
+
+        # (N_ * M_, D_, Lq_, L_* P_)
+        clip = self.deformable_att(joint_query, reference_points, input_flatten, input_spatial_shapes,
+                                       input_level_start_index, input_padding_mask)
+        clip = clip.reshape(N_, self.n_heads, -1, Lq_, self.n_levels * self.n_points).permute(0, 3, 1, 4, 2)
+        clip = clip.reshape(N_ * Lq_, self.n_heads, self.n_levels * self.n_points, self.att_feat_size)
+        att_size = self.n_levels * self.n_points
+
+        att = self.ctx2att(clip)                             # (batch * att_size) * att_hid_size
+        att = att.view(-1, self.n_heads, att_size, self.att_hid_size)     # batch * att_size * att_hid_size
+        att_h = self.h2att(state[0][-1])                    # batch * att_hid_size
+        att_h = att_h.unsqueeze(1).unsqueeze(1).expand_as(att)           # batch * att_size * att_hid_size
+        dot = att + att_h                                   # batch * att_size * att_hid_size
+        dot = torch.tanh(dot)  # batch * att_size * att_hid_size
+        dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
+        dot = self.alpha_net(dot)                           # (batch * att_size) * 1
+        dot = dot.view(-1, att_size)                        # batch * att_size
+
+        weight = F.softmax(dot, dim=1)
+        att_feats_ = clip.reshape(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size
+        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size
+        att_res = att_res.reshape(N_ * Lq_, self.n_heads, self.att_feat_size).flatten(1)
+        input_feats = torch.cat((att_res.unsqueeze(0), query), 2)
+        # print(xt.shape, input_feats.shape, query.shape, reference_points.shape)
+        output, state = self.rnn(torch.cat([xt.unsqueeze(0), input_feats], 2), state)
+
+        return output.squeeze(0), state
+
+
+class LSTMDSACaptioner(Captioner):
+    def __init__(self, opt):
+        super(LSTMDSACaptioner, self).__init__(opt)
+        self.core = ShowAttendTellCore(opt)
+
diff --git a/anet_clip/backup/pdvc/CaptioningHead/Puppet.py b/anet_clip/backup/pdvc/CaptioningHead/Puppet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3051b3d3de863fefc196e08740e7d6d05474adfd
--- /dev/null
+++ b/anet_clip/backup/pdvc/CaptioningHead/Puppet.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+
+class PuppetCaptionModel(nn.Module):
+    def __init__(self, opt):
+        super(PuppetCaptionModel, self).__init__()
+        self.vocab_size = opt.vocab_size
+        self.opt = opt
+        self.puppet_layer= nn.Linear(1,1)
+
+    def forward(self, event, clip, clip_mask, seq):
+        N, L = seq.shape
+        output = torch.zeros((N, L-1, self.vocab_size + 1), device=seq.device)
+        return output
+
+    def sample(self, event, clip, clip_mask, opt={}):
+        N, _, C = clip.shape
+        output = torch.zeros((N, 3), device=clip.device)
+        prob = torch.zeros((N, 3), device=clip.device)
+        return output, prob
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        output = - (one_hot * input * mask[..., None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__init__.py b/anet_clip/backup/pdvc/CaptioningHead/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..55abd1cc8681971b0e498d5db23771053240029f
--- /dev/null
+++ b/anet_clip/backup/pdvc/CaptioningHead/__init__.py
@@ -0,0 +1,22 @@
+from .LSTM import LightCaptioner
+from .Puppet import PuppetCaptionModel
+from .LSTM_DSA import LSTMDSACaptioner
+
+def build_captioner(opt):
+    if opt.caption_decoder_type == 'none':
+        caption_embed = PuppetCaptionModel(opt)
+
+    elif opt.caption_decoder_type == 'light':
+        opt.event_context_dim = None
+        opt.clip_context_dim = opt.hidden_dim
+        caption_embed = LightCaptioner(opt)
+
+    elif opt.caption_decoder_type == 'standard':
+        opt.event_context_dim = None
+        opt.clip_context_dim = opt.hidden_dim
+        caption_embed = LSTMDSACaptioner(opt)
+
+    else:
+        raise ValueError('caption decoder type is invalid')
+    return caption_embed
+
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96e1403d966894f3897772ec3341693c9e1e2097
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7505e3befe8da0cfc2e2cf4ad989639a7aad658
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac28b0fdbaca42bce04d24e8200908e43ca3849d
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82aceccc9d18b389c1de136320f99a9d3948bc21
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..204ecd5a71e01bd0a22222a738ac51abf7b3af9a
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86f06e3b6d2a72ca205a646c86a1e9309be235c6
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7506f43c89c0c6345ffd3c53b53cd87d5c394cbc
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e0d650e0f33bbf2aa9248e89a8ac9ec8a76397b
Binary files /dev/null and b/anet_clip/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/UniVL.py b/anet_clip/backup/pdvc/UniVL.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5a8bcf7f019968d8751bbbab0537295c77ebfdd
--- /dev/null
+++ b/anet_clip/backup/pdvc/UniVL.py
@@ -0,0 +1,238 @@
+
+import os
+import random
+import numpy as np
+from pathlib import Path
+from pdvc.modules.modeling import UniVL
+from pdvc.modules.tokenization import BertTokenizer
+from transformers import AutoTokenizer, BertForPreTraining
+import torch
+import argparse
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+class UniVL_args(object):
+    def __init__(self) -> None:
+        self.do_pretrain = False
+        self.do_train = False
+        self.do_eval = True
+        self.train_csv = 'data/youcookii_singlef_train.csv'
+        self.val_csv = 'data/youcookii_singlef_val.csv'
+        self.data_path = 'data/youcookii_caption.pickle'
+        self.features_path = 'data/youcookii_videos_feature.pickle'
+        self.num_thread_reader = 1
+        self.lr = 0.0001
+        self.epochs = 20
+        self.batch_size = 256
+        self.batch_size_val = 3500
+        self.lr_decay = 0.9
+        self.n_display = 100
+        self.video_dim = 1024
+        self.seed = 42
+        self.max_words = 48
+        self.max_frames = 100
+        self.feature_framerate = 1
+        self.margin = 0.1
+        self.hard_negative_rate = 0.5
+        self.negative_weighting = 1
+        self.n_pair = 1
+        self.output_dir = None
+        self.bert_model = "bert-base-uncased"
+        self.visual_model = "visual-base"
+        self.cross_model = "cross-base"
+        self.decoder_model = "decoder-base"
+        self.init_model = None
+        self.do_lower_case = True
+        self.warmup_proportion = 0.1
+        self.gradient_accumulation_steps = 1
+        self.n_gpu = 1
+        self.cache_dir = ""
+        self.fp16 = False
+        self.fp16_opt_level = 'O1'
+        self.task_type = "retrieval"
+        self.datatype = "youcook"
+        self.world_size = 0
+        self.local_rank = 0
+        self.coef_lr = 0.1
+        self.use_mil = False
+        self.sampled_use_mil = False
+        self.text_num_hidden_layers = 12
+        self.visual_num_hidden_layers = 6
+        self.cross_num_hidden_layers = 2
+        self.decoder_num_hidden_layers = 3
+        self.train_sim_after_cross = False
+        self.expand_msrvtt_sentences = False
+        self.batch_size = int(self.batch_size / self.gradient_accumulation_steps)
+
+    def __repr__(self) -> str:
+        return str(self.__dict__)
+
+
+
+
+# def get_args(description='UniVL on Retrieval Task'):
+#     parser = argparse.ArgumentParser(description=description)
+#     parser.add_argument("--do_pretrain", action='store_true', help="Whether to run training.")
+#     parser.add_argument("--do_train", action='store_true', help="Whether to run training.")
+#     parser.add_argument("--do_eval", action='store_true', default=True, help="Whether to run eval on the dev set.")
+
+#     parser.add_argument('--train_csv', type=str, default='data/youcookii_singlef_train.csv', help='')
+#     parser.add_argument('--val_csv', type=str, default='data/youcookii_singlef_val.csv', help='')
+#     parser.add_argument('--data_path', type=str, default='data/youcookii_caption.pickle', help='data pickle file path')
+#     parser.add_argument('--features_path', type=str, default='data/youcookii_videos_feature.pickle', help='feature path')
+
+#     parser.add_argument('--num_thread_reader', type=int, default=1, help='')
+#     parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate')
+#     parser.add_argument('--epochs', type=int, default=20, help='upper epoch limit')
+#     parser.add_argument('--batch_size', type=int, default=256, help='batch size')
+#     parser.add_argument('--batch_size_val', type=int, default=3500, help='batch size eval')
+#     parser.add_argument('--lr_decay', type=float, default=0.9, help='Learning rate exp epoch decay')
+#     parser.add_argument('--n_display', type=int, default=100, help='Information display frequence')
+#     parser.add_argument('--video_dim', type=int, default=1024, help='video feature dimension')
+#     parser.add_argument('--seed', type=int, default=42, help='random seed')
+#     parser.add_argument('--max_words', type=int, default=20, help='')
+#     parser.add_argument('--max_frames', type=int, default=100, help='')
+#     parser.add_argument('--feature_framerate', type=int, default=1, help='')
+#     parser.add_argument('--margin', type=float, default=0.1, help='margin for loss')
+#     parser.add_argument('--hard_negative_rate', type=float, default=0.5, help='rate of intra negative sample')
+#     parser.add_argument('--negative_weighting', type=int, default=1, help='Weight the loss for intra negative')
+#     parser.add_argument('--n_pair', type=int, default=1, help='Num of pair to output from data loader')
+
+#     parser.add_argument("--output_dir", default=None, type=str,
+#                         help="The output directory where the model predictions and checkpoints will be written.")
+#     parser.add_argument("--bert_model", default="bert-base-uncased", type=str,
+#                         help="Bert pre-trained model")
+#     parser.add_argument("--visual_model", default="visual-base", type=str, required=False, help="Visual module")
+#     parser.add_argument("--cross_model", default="cross-base", type=str, required=False, help="Cross module")
+#     parser.add_argument("--decoder_model", default="decoder-base", type=str, required=False, help="Decoder module")
+#     parser.add_argument("--init_model", default=None, type=str, required=False, help="Initial model.")
+#     parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.")
+#     parser.add_argument("--warmup_proportion", default=0.1, type=float,
+#                         help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% of training.")
+#     parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
+#                         help="Number of updates steps to accumulate before performing a backward/update pass.")
+#     parser.add_argument('--n_gpu', type=int, default=1, help="Changed in the execute process.")
+
+#     parser.add_argument("--cache_dir", default="", type=str,
+#                         help="Where do you want to store the pre-trained models downloaded from s3")
+
+#     parser.add_argument('--fp16', action='store_true',
+#                         help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
+#     parser.add_argument('--fp16_opt_level', type=str, default='O1',
+#                         help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+#                              "See details at https://nvidia.github.io/apex/amp.html")
+
+#     parser.add_argument("--task_type", default="retrieval", type=str, help="Point the task `retrieval` to finetune.")
+#     parser.add_argument("--datatype", default="youcook", type=str, help="Point the dataset `youcook` to finetune.")
+
+#     parser.add_argument("--world_size", default=0, type=int, help="distribted training")
+#     parser.add_argument("--local_rank", default=0, type=int, help="distribted training")
+#     parser.add_argument('--coef_lr', type=float, default=0.1, help='coefficient for bert branch.')
+#     parser.add_argument('--use_mil', action='store_true', help="Whether use MIL as Miech et. al. (2020).")
+#     parser.add_argument('--sampled_use_mil', action='store_true', help="Whether MIL, has a high priority than use_mil.")
+
+#     parser.add_argument('--text_num_hidden_layers', type=int, default=12, help="Layer NO. of text.")
+#     parser.add_argument('--visual_num_hidden_layers', type=int, default=6, help="Layer NO. of visual.")
+#     parser.add_argument('--cross_num_hidden_layers', type=int, default=2, help="Layer NO. of cross.")
+#     parser.add_argument('--decoder_num_hidden_layers', type=int, default=3, help="Layer NO. of decoder.")
+
+#     parser.add_argument('--train_sim_after_cross', action='store_true', help="Test retrieval after cross encoder.")
+#     parser.add_argument('--expand_msrvtt_sentences', action='store_true', help="")
+
+#     args = parser.parse_args()
+
+#     # Check paramenters
+#     if args.gradient_accumulation_steps < 1:
+#         raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
+#             args.gradient_accumulation_steps))
+#     if not args.do_train and not args.do_eval:
+#         raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+#     args.batch_size = int(args.batch_size / args.gradient_accumulation_steps)
+
+#     return args
+
+def set_seed_logger(args):
+    # predefining random initial seeds
+    random.seed(args.seed)
+    os.environ['PYTHONHASHSEED'] = str(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    torch.cuda.manual_seed(args.seed)
+    torch.cuda.manual_seed_all(args.seed)  # if you are using multi-GPU.
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+    # world_size = torch.distributed.get_world_size()
+    # torch.cuda.set_device(args.local_rank)
+    # args.world_size = world_size
+
+    # if not os.path.exists(args.output_dir):
+    #     os.makedirs(args.output_dir, exist_ok=True)
+
+    return args
+
+def load_pretrained_UniVL(return_visual_encoder=False):
+
+    args = UniVL_args()
+    args = set_seed_logger(args)
+    device, n_gpu = 'cuda', 1
+
+    init_model = '/cpfs01/user/liuhuabin/PDVC/pdvc/modules/univl.pretrained.bin'
+    model_state_dict = torch.load(init_model, map_location='cpu')
+
+    # Prepare model
+    cache_dir = os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed')
+    model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base',
+                                   cache_dir=cache_dir, state_dict=model_state_dict, task_config=args)
+
+    model.to(device)
+    if return_visual_encoder:
+        return model.bert, model.visual, model.normalize_video
+    else:
+        return model.bert
+
+def build_UniVL_tokenizer():
+    return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
+
+# if __name__ == '__main__':
+#     device, n_gpu = 'cuda', 1
+#     captions = ['I love you', 'you believe me']
+
+#     tokenizer_hg = AutoTokenizer.from_pretrained("bert-base-uncased")
+#     text_encoder_hg = tokenizer_hg(captions, return_tensors='pt', truncation=True, padding=True, max_length=20)
+#     text_encoder_hg = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_hg.items()}
+#     attention_mask = text_encoder_hg['attention_mask']
+
+#     args = UniVL_args()
+#     args = set_seed_logger(args)
+#     args.init_model = 'modules/univl.pretrained.bin'
+#     # tokenizer = build_UniVL_tokenizer()
+#     # input_ids = []
+#     # for sent in captions:
+#     #     sent = tokenizer.tokenize(sent)
+#     #     sent = ['[CLS]'] + sent + ['[SEP]']
+#     #     input_ids += tokenizer.convert_tokens_to_ids(sent)
+#     model = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model)
+#     text_embed = model(**text_encoder_hg, output_all_encoded_layers=True)[0][-1]
+#     breakpoint()
+
+if __name__ == '__main__':
+    device, n_gpu = 'cuda', 1
+    args = UniVL_args()
+    args = set_seed_logger(args)
+    args.init_model = 'modules/univl.pretrained.bin'
+    # tokenizer = build_UniVL_tokenizer()
+    # input_ids = []
+    # for sent in captions:
+    #     sent = tokenizer.tokenize(sent)
+    #     sent = ['[CLS]'] + sent + ['[SEP]']
+    #     input_ids += tokenizer.convert_tokens_to_ids(sent)
+    model_bert, model_visual, video_normalizer = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model)
+    inputs = torch.rand(2,215,1024)
+    video_mask = torch.ones(2,215)
+    inputs = video_normalizer(inputs)
+    visual_embed = model_visual(inputs, video_mask, output_all_encoded_layers=True)[0][-1]
+    
+    breakpoint()
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/__init__.py b/anet_clip/backup/pdvc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..194ecd26a483cef3e67c0e5cd971d4f7784aac67
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a406cf3565bfcd54eddc5d19fbeae7bffd2d629
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d4a911fc83c9364bfc6b98dd5d3d5a4ed14f5e3f
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/__init__.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9cafe04379877ab0c87872ae9835aa9bdf4532a4
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6961cba44a3fa93be1463250d574c8d91411714f
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d08274f898128d993db3370b9307fabf56c98f6
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/criterion.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f48fdb961f47546c71e60e995699a206b62a4f6a
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d45de6e0f900d019a24e0f339e62874f2038557e
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6efd23cdeac69c752a715a184606139f2aded19b
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f80042e195d3ecda40db7fe17e8b2b6b8991a376
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/matcher.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7bef286e186c9f27de1ea48197eee0fae6a7d6f
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c50c9f41bc67334949478d72b69f998d849c9f37
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2b9fbde23c0b61d1377c3e8a2c9af095131c45d
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6e503a7b7440cff82242de19b9d909ba99e5f803
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/util.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df84303c83b25082e579d99e0bdbc7c05bf182ef
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/util.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f2bec65e4730af469226f8efdb168b47da926ef
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff5a503c7efa463a12801a2f62599ed146e5ca93
Binary files /dev/null and b/anet_clip/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/base_encoder.py b/anet_clip/backup/pdvc/base_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cb150a62dbb709589ec5271fe1b11ec16adf8f8
--- /dev/null
+++ b/anet_clip/backup/pdvc/base_encoder.py
@@ -0,0 +1,86 @@
+# ------------------------------------------------------------------------
+# PDVC
+# ------------------------------------------------------------------------
+# Modified from Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Base Encoder to create multi-level conv features and positional embedding.
+"""
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from misc.detr_utils.misc import NestedTensor
+from .position_encoding import PositionEmbeddingSine
+
+
+class BaseEncoder(nn.Module):
+    def __init__(self, num_feature_levels, vf_dim, hidden_dim):
+        super(BaseEncoder, self).__init__()
+        self.pos_embed = PositionEmbeddingSine(hidden_dim//2, normalize=True)
+        self.num_feature_levels = num_feature_levels
+        self.hidden_dim = hidden_dim
+
+        if num_feature_levels > 1:
+            input_proj_list = []
+            in_channels = vf_dim
+            input_proj_list.append(nn.Sequential(
+                nn.Conv1d(in_channels, hidden_dim, kernel_size=1),
+                nn.GroupNorm(32, hidden_dim),
+            ))
+            for _ in range(num_feature_levels - 1):
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv1d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+                in_channels = hidden_dim
+            self.input_proj = nn.ModuleList(input_proj_list)
+        else:
+            self.input_proj = nn.ModuleList([
+                nn.Sequential(
+                    nn.Conv2d(vf_dim, hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                )])
+
+        for proj in self.input_proj:
+            nn.init.xavier_uniform_(proj[0].weight, gain=1)
+            nn.init.constant_(proj[0].bias, 0)
+
+    def forward(self, vf, mask, duration):
+        # vf: (N, L, C), mask: (N, L),  duration: (N)
+        vf = vf.transpose(1, 2)  # (N, L, C) --> (N, C, L)
+        vf_nt = NestedTensor(vf, mask, duration)
+        pos0 = self.pos_embed(vf_nt)
+
+        srcs = []
+        masks = []
+        poses = []
+
+        src0, mask0 = vf_nt.decompose()
+        srcs.append(self.input_proj[0](src0))
+        masks.append(mask0)
+        poses.append(pos0)
+        assert mask is not None
+
+        for l in range(1, self.num_feature_levels):
+            if l == 1:
+                src = self.input_proj[l](vf_nt.tensors)
+            else:
+                src = self.input_proj[l](srcs[-1])
+            m = vf_nt.mask
+            mask = F.interpolate(m[None].float(), size=src.shape[-1:]).to(torch.bool)[0]
+            pos_l = self.pos_embed(NestedTensor(src, mask, duration)).to(src.dtype)
+            srcs.append(src)
+            masks.append(mask)
+            poses.append(pos_l)
+        return srcs, masks, poses
+
+def build_base_encoder(args):
+    base_encoder = BaseEncoder(args.num_feature_levels, args.feature_dim, args.hidden_dim)
+    return base_encoder
diff --git a/anet_clip/backup/pdvc/criterion.py b/anet_clip/backup/pdvc/criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..d47eb41a6711be9904ad6c55d502572261ff73c9
--- /dev/null
+++ b/anet_clip/backup/pdvc/criterion.py
@@ -0,0 +1,726 @@
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+import copy
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from misc.detr_utils import box_ops
+from misc.detr_utils.misc import (accuracy, get_world_size,
+                         is_dist_avail_and_initialized)
+
+class SetCriterion(nn.Module):
+    """ This class computes the loss for DETR.
+    The process happens in two steps:
+        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
+        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
+    """
+    def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}):
+        """ Create the criterion.
+        Parameters:
+            num_classes: number of object categories, omitting the special no-object category
+            matcher: module able to compute a matching between targets and proposals
+            weight_dict: dict containing as key the names of the losses and as values their relative weight.
+            losses: list of all the losses to be applied. See get_loss for list of available losses.
+            focal_alpha: alpha in Focal Loss
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.losses = losses
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.opt = opt
+        self.pseudo_box_aug = opt.pseudo_box_aug
+        self.refine_pseudo_box = opt.refine_pseudo_box
+        if ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]):
+            counter_class_rate  =[0.0, 0.012703673018503175, 0.04915769124551229, 0.06489919911626622, 0.0740127036730185, 0.07346037006351837, 0.08064070698702017,
+            0.07069870201601768, 0.07870753935376967, 0.07097486882076774, 0.06766086716376692, 0.0579950289975145, 0.05247169290251312, 0.03783485225075946,
+            0.03534935100800884, 0.03203534935100801, 0.026788180060756697, 0.02236951118475559, 0.01988400994200497, 0.016570008285004142, 0.013256006628003313,
+            0.00856117094725214, 0.006904170118751726, 0.005523336095001381, 0.004694835680751174, 0.0038663352665009665, 0.0027616680475006906, 0.0027616680475006906,
+            0.0016570008285004142, 0.0016570008285004142, 0.0005523336095001381, 0.0008285004142502071, 0.0, 0.00027616680475006904, 0.0, 0.0, 0.00027616680475006904,
+            0.0011046672190002762, 0.0, 0.0005523336095001381, 0.0, 0.0, 0.0005523336095001381]
+        else:
+            counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01,
+       1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02,
+       1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03,
+       2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04,
+       4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04,
+       2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00,
+       0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05]
+        self.counter_class_rate = torch.tensor(counter_class_rate)
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
+        """Classification loss (NLL)
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        indices, many2one_indices = indices
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1],
+                                            dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device)
+        target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)
+
+        target_classes_onehot = target_classes_onehot[:,:,:-1]
+        loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1]
+        losses = {'loss_ce': loss_ce}
+        pred_count = outputs['pred_count']
+        max_length = pred_count.shape[1] - 1
+        counter_target = [len(target['boxes']) if len(target['boxes']) < max_length  else max_length for target in targets]
+        counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long)
+        counter_target_onehot = torch.zeros_like(pred_count)
+        counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1)
+        weight = self.counter_class_rate[:max_length + 1].to(src_logits.device)
+
+        counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight)
+        losses['loss_counter'] = counter_loss
+
+        return losses
+
+    @torch.no_grad()
+    def loss_cardinality(self, outputs, targets, indices, num_boxes):
+        """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
+        This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
+        """
+        pred_logits = outputs['pred_logits']
+        device = pred_logits.device
+        tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device)
+        # Count the number of predictions that are NOT "no-object" (which is the last class)
+        card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
+        card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
+        losses = {'cardinality_error': card_err}
+        return losses
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes):
+        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
+           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 2]
+           The target boxes are expected in format (center, length), normalized by the image size.
+        """
+        indices, many2one_indices = indices
+        N = len(indices[-1][0])
+        assert 'pred_boxes' in outputs
+        idx, idx2 = self._get_src_permutation_idx2(indices)
+        src_boxes = outputs['pred_boxes'][idx]
+        if self.opt.use_pseudo_box and self.training:
+            # print('use pseudo box')
+            target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        else:
+            # print('use gt box')
+            target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')
+
+        losses = {}
+        losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+        loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
+            box_ops.box_cl_to_xy(src_boxes),
+            box_ops.box_cl_to_xy(target_boxes)))
+        losses['loss_giou'] = loss_giou.sum() / num_boxes
+        # print(src_boxes)
+        self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes),
+                                              box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1)
+        sizes = [len(v[0]) for v in indices]
+        if sizes == [1]:
+            losses['loss_self_iou'] = self_iou
+            return losses
+        self_iou_split = 0
+        for i, c in enumerate(self_iou.split(sizes, -1)):
+            cc = c.split(sizes, -2)[i]
+            self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1))
+        has_nan = False if torch.all(~torch.isnan(self_iou_split)) else True  
+        has_inf = False if torch.all(torch.isfinite(self_iou_split)) else True
+        if has_nan or has_inf:
+            breakpoint()
+        losses['loss_self_iou'] = self_iou_split
+
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_src_permutation_idx2(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        src_idx2 = torch.cat([src for (_, src) in indices])
+        return (batch_idx, src_idx), src_idx2
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+    
+
+
+    def get_jittered_box(self, box, box_jitter, box_aug_num=5, mode='random'):
+        # breakpoint()
+        box = box.unsqueeze(0) # (1,2)
+        if mode == 'random':
+            scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale = torch.cat([scale_c, scale_d], dim=1)
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel()
+            box_repeat = box.repeat(box_aug_num, 1)
+            box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]]
+        elif mode == 'random_new':
+            scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale = torch.cat([scale_c, scale_d], dim=1)
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel()
+            box_repeat = box.repeat(box_aug_num, 1)
+            box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]]
+        elif mode == 'uniform':
+            ratio_c = box_jitter
+            ratio_d = 0.048 / 2
+            scale_c = torch.tensor([-ratio_c, -ratio_c/2, -ratio_c/4, ratio_c/4, ratio_c/2, ratio_c])
+            scale_d = torch.tensor([-ratio_d, -ratio_d/2, ratio_d/2, ratio_d])
+            scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device)
+            breakpoint()
+            scale_box = box + scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            unkeep_idx = torch.where(iou.reshape(-1) <= 0.1)[0]
+            if keep_idx.numel() < (box_aug_num-1):
+                box_repeat = box.repeat(box_aug_num, 1)
+                box_repeat[:keep_idx.numel()] = scale_box[keep_idx]
+                random_indices = torch.randperm(unkeep_idx.size(0))[:(box_aug_num-1-keep_idx.numel())]
+                box_repeat[keep_idx.numel():(box_aug_num-1)] = scale_box[unkeep_idx[random_indices]]
+            else:
+                box_repeat = box.repeat(box_aug_num, 1)
+                random_indices = torch.randperm(keep_idx.numel())[:(box_aug_num-1)]
+                box_repeat[:box_aug_num-1] = scale_box[keep_idx[random_indices]]
+        elif mode == 'uniform_old':
+            # Conduct augment using pre-defined ratio
+            ratio_c = box_jitter
+            ratio_d = box_jitter
+            scale_c = torch.linspace(1-ratio_c, 1+ratio_c, 4)
+            scale_d = torch.linspace(1-ratio_d, 1+ratio_d, 2)
+            scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) # 16 augmented boxes in total
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            # keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            box_repeat = box.repeat(box_aug_num, 1)
+            random_indices = torch.randperm(scale_box.size(0))[:(box_aug_num-1)]
+            box_repeat[:(box_aug_num-1)] = scale_box[random_indices]
+        elif mode == 'random_range':
+            def batch_randomize_boxes(boxes, max_vary_range, num_samples=1):
+                # Get the centers and widths from the input boxes
+                centers = boxes[:, 0]
+                widths = boxes[:, 1]
+                # breakpoint()
+                # Generate random values for the left and right boundaries for each box
+
+                left_boundaries = centers - (widths / 2) - torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range)
+                right_boundaries = centers + (widths / 2) + torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range)
+
+                # Ensure that the boundaries stay within the [0, 1] range
+                left_boundaries = left_boundaries.clamp(0, 1)
+                right_boundaries = right_boundaries.clamp(0, 1)
+
+
+                # Calculate the new centers and widths
+                new_centers = (left_boundaries + right_boundaries) / 2
+                new_widths = right_boundaries - left_boundaries
+
+                # Ensure that the widths are non-negative and revert to the original boxes if needed
+                is_negative = new_widths <= 0
+                new_widths = torch.where(is_negative, widths, new_widths)
+                new_centers = torch.where(is_negative, centers, new_centers)
+
+                # Create and return the new boxes tensor
+                new_boxes = torch.stack((new_centers, new_widths), dim=2)
+                return new_boxes.squeeze(0)
+            box_repeat = batch_randomize_boxes(box, box_jitter, box_aug_num)
+            if torch.isnan(box_repeat).any():
+                breakpoint()
+        elif mode == 'augment_width': # original width is 0.5 \sigma range
+            import random
+            def augment_boxes_with_scale(boxes, scale, num_augments):
+                augmented_boxes = []
+                for _ in range(num_augments):
+                    center, width = boxes[0]
+                    # Generate a random scale factor with a more uniform distribution
+                    random_scale = scale ** random.uniform(-1, 1)
+                    new_width = width * random_scale
+                    if center + new_width / 2 > 1 or center - new_width / 2 < 0:
+                        new_width = width
+                    augmented_boxes.append([center, new_width])
+                augmented_boxes = torch.tensor(augmented_boxes, device=boxes.device)
+                return augmented_boxes
+            box_repeat = augment_boxes_with_scale(box, box_jitter, box_aug_num)
+            # breakpoint()
+
+        else:
+            raise NotImplementedError('Not support box augmentation mode: {}'.format(mode))      
+        return box_repeat
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'cardinality': self.loss_cardinality,
+            'boxes': self.loss_boxes,
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
+
+    def forward(self, outputs, targets, others=None, aug_num=None, aug_ratio=None):
+        """ This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             targets: list of dicts, such that len(targets) == batch_size.
+                      The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'}
+        if self.training and self.pseudo_box_aug:
+            targets_cp = copy.deepcopy(targets)
+            assert self.opt.use_pseudo_box
+            for i in range((len(targets_cp))):
+                boxes_aug = []
+                for j in range(len(targets_cp[i]['labels'])):
+                    try: 
+                        pseudo_box = targets_cp[i]['boxes_pseudo'][j]
+                    except:
+                        breakpoint()
+                    peseudo_box_aug = self.get_jittered_box(pseudo_box, aug_ratio, aug_num, self.opt.pseudo_box_aug_mode)
+                    boxes_aug.append(peseudo_box_aug)
+                targets_cp[i]['boxes_pseudo'] = torch.cat(boxes_aug, dim=0)
+                targets_cp[i]['labels'] = targets_cp[i]['labels'].unsqueeze(dim=1).repeat(1, aug_num).reshape(-1,)
+                targets[i]['box_pseudo_aug'] = torch.cat(boxes_aug, dim=0)
+        # Retrieve the matching between the outputs of the last layer and the targets
+            last_indices = self.matcher(outputs_without_aux, targets_cp)
+        else:
+            targets_cp = targets
+            last_indices = self.matcher(outputs_without_aux, targets)
+        outputs['matched_indices'] = last_indices
+
+        num_boxes = sum(len(t["labels"]) for t in targets_cp)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            kwargs = {}
+            losses.update(self.get_loss(loss, outputs, targets_cp, last_indices, num_boxes, **kwargs))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            aux_indices = []
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(aux_outputs, targets_cp)
+                aux_indices.append(indices)
+                for loss in self.losses:
+                    if loss == 'masks':
+                        # Intermediate masks losses are too costly to compute, we ignore them.
+                        continue
+                    kwargs = {}
+                    if loss == 'labels':
+                        # Logging is enabled only for the last layer
+                        kwargs['log'] = False
+                    l_dict = self.get_loss(loss, aux_outputs, targets_cp, indices, num_boxes, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            return losses, last_indices, aux_indices
+        return losses, last_indices
+
+class AlignCriterion(nn.Module):
+    """ This class computes the loss for DETR.
+    The process happens in two steps:
+        1) we compute DTW assignment between ground truth captions and the outputs object queries
+        2) we supervise each pair of matched ground-truth / prediction (supervise class)
+    """
+    def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}):
+        """ Create the criterion.
+        Parameters:
+            num_classes: number of object categories, omitting the special no-object category
+            matcher: module able to compute a matching between targets and proposals
+            weight_dict: dict containing as key the names of the losses and as values their relative weight.
+            losses: list of all the losses to be applied. See get_loss for list of available losses.
+            focal_alpha: alpha in Focal Loss
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.losses = losses
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.opt = opt
+        counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01,
+       1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02,
+       1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03,
+       2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04,
+       4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04,
+       2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00,
+       0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05]
+        self.counter_class_rate = torch.tensor(counter_class_rate)
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
+        """Classification loss (NLL)
+        Compute the classification loss and counter loss
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        indices, many2one_indices = indices
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1],
+                                            dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device)
+        target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)
+
+        target_classes_onehot = target_classes_onehot[:,:,:-1]
+        loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1]
+        losses = {'loss_ce': loss_ce}
+
+        pred_count = outputs['pred_count']
+        max_length = pred_count.shape[1] - 1
+        counter_target = [len(target['boxes']) if len(target['boxes']) < max_length  else max_length for target in targets]
+        counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long)
+        counter_target_onehot = torch.zeros_like(pred_count)
+        counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1)
+        weight = self.counter_class_rate[:max_length + 1].to(src_logits.device)
+        # breakpoint()
+        counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight)
+        losses['loss_counter'] = counter_loss
+
+        return losses
+    
+    def loss_boxes(self, outputs, targets, indices, num_boxes):
+        # Compute temporal IOU loss among given predicted N temporal boundaries, which encourages the temporal boundaries to be more diverse and no overlap
+        # outputs: (bsz, num_query, 2)
+        # breakpoint()
+        # breakpoint()
+        indices, many2one_indices = indices
+        idx, idx2 = self._get_src_permutation_idx2(indices)
+        src_boxes = outputs['pred_boxes'][idx] # num_boxes, 2
+        avg_duration = torch.mean(src_boxes[:, 1])
+        center_point = src_boxes[:,0]
+        N = len(indices[-1][0])
+
+        losses = {}
+
+        if self.opt.use_pseudo_box and self.training:
+            # If generate peseudo ground truth boxes from alignment, use the alignment boxes as the target boxes
+            target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+            loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')
+            losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+            loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
+            box_ops.box_cl_to_xy(src_boxes),
+            box_ops.box_cl_to_xy(target_boxes)))
+            losses['loss_giou'] = loss_giou.sum() / num_boxes
+
+        if not self.opt.use_pseudo_box:
+            ## Squence Ordering loss
+            rank_margin = 0.01
+            pairs = torch.combinations(torch.arange(center_point.size(0)), 2)
+            rank_dist = center_point[pairs[:, 0]] - center_point[pairs[:, 1]] + rank_margin
+            # Make sure that the center points are ordered
+            rank_loss = torch.relu(rank_margin + rank_dist).mean()
+
+            losses['loss_ref_rank']  = rank_loss
+
+            ## Self IOU loss
+            prior_duration = 0.06
+            self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes),
+                                                box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1)
+            sizes = [len(v[0]) for v in indices]
+            self_iou_split = 0
+            for i, c in enumerate(self_iou.split(sizes, -1)):
+                cc = c.split(sizes, -2)[i]
+                self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1))
+            duration_constraint = torch.abs(prior_duration/(avg_duration + 1e-6) - 1)
+            self_iou_split += duration_constraint
+            
+            
+            losses['loss_self_iou'] = self_iou_split
+
+        return losses
+
+    @torch.no_grad()
+    def loss_cardinality(self, outputs, targets, indices, num_boxes):
+        """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
+        This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
+        """
+        pred_logits = outputs['pred_logits']
+        device = pred_logits.device
+        tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device)
+        # Count the number of predictions that are NOT "no-object" (which is the last class)
+        card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
+        card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
+        losses = {'cardinality_error': card_err}
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_src_permutation_idx2(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        src_idx2 = torch.cat([src for (_, src) in indices])
+        return (batch_idx, src_idx), src_idx2
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'boxes': self.loss_boxes,
+            'cardinality': self.loss_cardinality,
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
+
+    def forward(self, outputs, targets, others):
+        """ This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             targets: list of dicts, such that len(targets) == batch_size.
+                      The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        text_embed = others['text_embed'] # num_dec_layers, num_sentence, dim
+        event_embed = others['event_embed'] # num_dec_layers, num_query, dim
+        dim = event_embed.shape[-1]
+
+        # Retrieve the matching between the outputs of the last layer and the targets
+        # if self.opt.matcher_type == 'DTW':
+        #     last_indices = self.matcher(text_embed[-1], event_embed[-1].reshape(-1, dim))
+        # elif self.opt.matcher_type == 'Sim':
+        #     last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+        # else:
+        #     raise NotImplementedError('Align Criterion does not support:{}'.format(self.opt.matcher_type))
+        #breakpoint()
+        last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+        outputs['matched_indices'] = last_indices
+
+        num_boxes = sum(len(t["labels"]) for t in targets)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            kwargs = {}
+            losses.update(self.get_loss(loss, outputs, targets, last_indices, num_boxes, **kwargs))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            aux_indices = []
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+                aux_indices.append(indices)
+                for loss in self.losses:
+                    kwargs = {}
+                    if loss == 'labels':
+                        # Logging is enabled only for the last layer
+                        kwargs['log'] = False
+                    l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            return losses, last_indices, aux_indices
+        return losses, last_indices
+
+class ContrastiveCriterion(nn.Module):
+    '''
+    Contrastive loss between event feature and caption feature
+    '''
+
+    def __init__(self, temperature=0.1, enable_cross_video_cl=False, enable_e2t_cl=False, enable_bg_for_cl=False):
+        super().__init__()
+        self.temperature = temperature
+        self.enable_cross_video_cl = enable_cross_video_cl
+        self.enable_e2t_cl = enable_e2t_cl
+        self.enable_bg_for_cl = enable_bg_for_cl
+
+    def forward_logits(self, text_embed, event_embed, bg_embed=None):
+        normalized_text_emb = F.normalize(text_embed, p=2, dim=1)
+        normalized_event_emb = F.normalize(event_embed, p=2, dim=1)
+        logits = torch.mm(normalized_text_emb, normalized_event_emb.t())
+        if bg_embed is not None:
+            bg_logits = torch.sum(normalized_event_emb * F.normalize(bg_embed, p=2), dim=1)
+            logits = torch.cat((logits, bg_logits.unsqueeze(0)), dim=0)
+        return logits
+
+
+    def forward(self, text_embed, event_embed, matching_indices, return_logits=False, bg_embed=None):
+
+        '''
+        :param text_embed: [(event_num, contrastive_hidden_size)], len = batch size
+                            total_event_number = sum of event number of each item in current batch
+        :param event_embed: (bsz, max_event_num, contrastive_hiddent_size), which need to be
+                            expand in this function
+        :param matching_indices: (bsz, event_num)
+        '''
+        batch_size, max_event_num, _ = event_embed.shape
+        event_embed, text_embed, gt_labels, gt_event_num = self._preprocess(event_embed, [text_embed], matching_indices)
+        raw_logits = self.forward_logits(text_embed, event_embed)
+        logits = raw_logits / self.temperature
+
+        if self.enable_cross_video_cl:
+            t2e_loss = F.cross_entropy(logits, gt_labels)
+            if self.enable_e2t_cl:
+                gt_label_matrix = torch.zeros(len(text_embed) + 1, len(event_embed), device=text_embed.device)
+                gt_label_matrix[torch.arange(len(gt_labels)), gt_labels] = 1
+                event_mask = gt_label_matrix.sum(dim=0) == 0
+                gt_label_matrix[-1, event_mask] = 1
+                e2t_gt_label = gt_label_matrix.max(dim=0)[1]
+                bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1)
+                e2t_logits = torch.cat((logits, bg_logits.unsqueeze(0) / self.temperature), dim=0)
+                if self.enable_bg_for_cl:
+                    e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label)
+                else:
+                    e2t_loss = F.cross_entropy(e2t_logits.t()[~event_mask], e2t_gt_label[~event_mask])
+                loss = 0.5 * (t2e_loss + e2t_loss)
+            else:
+                loss = t2e_loss
+        else:
+            loss = 0; base = 0
+            for i in range(batch_size):
+                current_gt_event_num = gt_event_num[i]
+                current_logits = logits[base: base + current_gt_event_num, i * max_event_num: (i + 1) * max_event_num]
+                current_gt_labels = gt_labels[base: base + current_gt_event_num]
+                t2e_loss = F.cross_entropy(current_logits, current_gt_labels)
+                if self.enable_e2t_cl:
+                    gt_label_matrix = torch.zeros(gt_event_num[i] + 1, max_event_num, device=text_embed.device)
+                    gt_label_matrix[torch.arange(current_gt_labels), current_gt_labels] = 1
+                    event_mask = gt_label_matrix.sum(dim=0) == 0
+                    e2t_gt_label = gt_label_matrix.max(dim=0)[1]
+                    bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1)
+                    e2t_logits = torch.cat((current_logits, bg_logits.unsqueeze(0) / self.temperature), dim=0)
+                    if self.enable_bg_for_cl:
+                        e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label)
+                    else:
+                        e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label, ignore_index=len(text_embed), reduction='sum') / (1e-5 + sum(~event_mask))
+                    loss += 0.5 * (t2e_loss + e2t_loss)
+                else:
+                    loss += t2e_loss
+                base += current_gt_event_num
+            loss = loss / batch_size
+        # pdb.set_trace()
+        if return_logits:
+            return loss, raw_logits
+        return loss
+
+
+    def _preprocess(self, event_embed, text_embed, matching_indices):
+        '''
+        Flatten event_embed of a batch, get gt label
+
+        :param matching_indices: [(event_num, )]  len = bsz
+        '''
+        batch_size, max_event_num, f_dim = event_embed.shape
+        gt_labels = []
+        text_features = []
+        gt_event_num = []
+        event_features = event_embed.view(-1, f_dim)
+        for i in range(batch_size):
+            base = i * max_event_num if self.enable_cross_video_cl else 0
+            feat_ids, cap_ids = matching_indices[i]
+            gt_event_num.append(len(feat_ids))
+            text_features.append(text_embed[i][cap_ids])
+            gt_labels.append(feat_ids + base)
+        text_features = torch.cat(text_features, dim=0)
+        gt_labels = torch.cat(gt_labels, dim=0)
+        gt_labels = gt_labels.to(event_embed.device)
+            
+        return event_features, text_features, gt_labels, gt_event_num
+
+def cross_entropy_with_gaussian_mask(inputs, targets, opt, weight):
+    gau_mask = opt.lloss_gau_mask
+    beta = opt.lloss_beta
+
+    N_, max_seq_len = targets.shape
+    gassian_mu = torch.arange(max_seq_len, device=inputs.device).unsqueeze(0).expand(max_seq_len,
+                                                                                     max_seq_len).float()
+    x = gassian_mu.transpose(0, 1)
+    gassian_sigma = 2
+    mask_dict = torch.exp(-(x - gassian_mu) ** 2 / (2 * gassian_sigma ** 2))
+    _, ind = targets.max(dim=1)
+    mask = mask_dict[ind]
+
+    loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none", weight= 1 - weight)
+    if gau_mask:
+        coef = targets + ((1 - mask) ** beta) * (1 - targets)
+    else:
+        coef = targets + (1 - targets)
+    loss = loss * coef
+    loss = loss.mean(1)
+    return loss.mean()
+
+def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
+    """
+    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
+    Args:
+        inputs: A float tensor of arbitrary shape.
+                The predictions for each example.
+        targets: A float tensor with the same shape as inputs. Stores the binary
+                 classification label for each element in inputs
+                (0 for the negative class and 1 for the positive class).
+        alpha: (optional) Weighting factor in range (0,1) to balance
+                positive vs negative examples. Default = -1 (no weighting).
+        gamma: Exponent of the modulating factor (1 - p_t) to
+               balance easy vs hard examples.
+    Returns:
+        Loss tensor
+    """
+
+    prob = inputs.sigmoid()
+    ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") # with_logits func calculates sigmoid and CE jointly
+    p_t = prob * targets + (1 - prob) * (1 - targets)
+    loss = ce_loss * ((1 - p_t) ** gamma)
+
+    if alpha >= 0:
+        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
+        loss = alpha_t * loss
+
+    return loss.mean(1).sum() / num_boxes
+
+def regression_loss(inputs, targets, opt, weight):
+    inputs = F.relu(inputs) + 2
+    max_id = torch.argmax(targets, dim=1)
+    if opt.regression_loss_type == 'l1':
+        loss = nn.L1Loss()(inputs[:, 0], max_id.float())
+    elif opt.regression_loss_type == 'l2':
+        loss = nn.MSELoss()(inputs[:, 0], max_id.float())
+    return loss
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/deformable_transformer.py b/anet_clip/backup/pdvc/deformable_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e9b742061b166e0badc41db80f5423b0e46a746
--- /dev/null
+++ b/anet_clip/backup/pdvc/deformable_transformer.py
@@ -0,0 +1,496 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+import copy
+import math
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.init import xavier_uniform_, constant_, normal_
+
+from misc.detr_utils.misc import  inverse_sigmoid
+from pdvc.ops.modules import MSDeformAttn
+
+
+class DeformableTransformer(nn.Module):
+    def __init__(self, d_model=256, nhead=8,
+                 num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1,
+                 activation="relu", return_intermediate_dec=False,
+                 num_feature_levels=4, dec_n_points=4, enc_n_points=4, use_anchor=False):
+        super().__init__()
+
+        self.d_model = d_model
+        self.nhead = nhead
+        self.use_anchor = use_anchor
+
+        self.no_encoder = (num_encoder_layers == 0)
+        self.num_feature_levels = num_feature_levels
+
+        encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, enc_n_points)
+        self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers)
+
+        decoder_layer = DeformableTransformerDecoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, dec_n_points)
+        self.decoder = DeformableTransformerDecoder(decoder_layer, num_decoder_layers, return_intermediate_dec, d_model, use_anchor)
+
+        self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model))
+
+        self.pos_trans = nn.Linear(d_model, d_model * 2)
+        self.pos_trans_norm = nn.LayerNorm(d_model * 2)
+        self.reference_points = nn.Linear(d_model, 1)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for m in self.modules():
+            if isinstance(m, MSDeformAttn):
+                m._reset_parameters()
+        # if not self.use_anchor:
+        xavier_uniform_(self.reference_points.weight.data, gain=1.0)
+        constant_(self.reference_points.bias.data, 0.)
+        normal_(self.level_embed)
+
+
+    def get_proposal_pos_embed(self, proposals):
+        num_pos_feats = 256
+        temperature = 10000
+        scale = 2 * math.pi
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device)
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)
+        # N, L, 2
+        proposals = proposals.sigmoid() * scale
+        # N, L, 2, 256
+        pos = proposals[:, :, :, None] / dim_t
+        # N, L, 2, 128, 2
+        pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()), dim=4).flatten(2)
+        return pos
+    
+    def get_proposal_pos_embed_1d(self, proposals):
+        num_pos_feats = 512
+        temperature = 10000
+        scale = 2 * math.pi
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) 
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) 
+
+        # N, L
+        proposals = proposals.sigmoid() * scale
+        # N, L, 512
+        pos = proposals[:, None] / dim_t 
+
+        pos = torch.stack((pos[:, 0::2].sin(), pos[:, 1::2].cos()), dim=2).flatten(1) 
+        return pos 
+
+    def get_valid_ratio(self, mask):
+        valid_ratio_L = torch.sum(~mask, 1).float() / mask.shape[1]
+        return valid_ratio_L
+
+    def prepare_encoder_inputs(self, srcs, masks, pos_embeds):
+        # prepare input for encoder
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        temporal_shapes = []
+        for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)):
+            """
+            lvl: (bs, )
+            src: (bs, c, L )
+            mask: (bs, L)
+            pos_embed: (bs, d_m, L)
+            """
+            bs, c, L = src.shape
+            temporal_shapes.append(L)
+            src = src.transpose(1, 2)  # （bs, L, c）
+            pos_embed = pos_embed.transpose(1, 2)  # #（bs, L, d_m）
+            lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+            mask_flatten.append(mask)
+        src_flatten = torch.cat(src_flatten, 1)  # (lvl_num, bs, wh, c)
+        mask_flatten = torch.cat(mask_flatten, 1)  # (lvl_num, bs, wh)
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)  # (lvl_num, bs, wh, d_m)
+        temporal_shapes = torch.as_tensor(temporal_shapes, dtype=torch.long, device=src_flatten.device)  # (lvl_num, 2)
+        level_start_index = torch.cat((temporal_shapes.new_zeros((1,)), temporal_shapes.cumsum(0)[
+                                                                       :-1]))  # prod: [w0h0, w0h0+w1h1, w0h0+w1h1+w2h2, ...]
+        valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks],
+                                   1)  # (bs, lvl_num, 2), where 2 means (h_rate, and w_rate)， all values <= 1
+
+        return src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten
+
+    def forward_encoder(self, src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                        mask_flatten):
+        # encoder
+        if self.no_encoder:
+            memory = src_flatten
+        else:
+            memory = self.encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                                  mask_flatten)
+
+        return memory
+
+    def prepare_decoder_input_query(self, memory, query_embed):
+        bs, _, _ = memory.shape
+        query_embed, tgt = torch.chunk(query_embed, 2, dim=1)
+        query_embed = query_embed.unsqueeze(0).expand(bs, -1, -1)
+        tgt = tgt.unsqueeze(0).expand(bs, -1, -1)
+        reference_points = self.reference_points(query_embed).sigmoid() # (bs, object_query, 1)
+        init_reference_out = reference_points  # (bs, object_query, 1)
+        return init_reference_out, tgt, reference_points, query_embed
+    
+    def prepare_init_anchor_and_query(self, anchor_embed, hidden_dim, random_anchor_init=False, prior_anchor_duration_init=False, prior_duration=0.048):
+        num_queries = anchor_embed.weight.shape[0]
+        # query_embed = nn.Embedding(num_queries, hidden_dim)
+        if random_anchor_init:
+            anchor_embed.weight.data[:, :1] = torch.linspace(0, 1, num_queries).unsqueeze(1)
+            anchor_embed.weight.data[:, :1] = inverse_sigmoid(anchor_embed.weight.data[:, :1])
+            print('Initilize the anchor center point with uniform distribution')
+            #self.anchor_embed.weight.data[:, :1].requires_grad = False # DAB-anchor set this to be False
+            anchor_embed.weight.data[:, :1].requires_grad = True # I set it to be True
+            # breakpoint()
+        if prior_anchor_duration_init:
+            # TODO: add prior anchor duration initialization, the below implementation is not correct
+            torch.nn.init.constant_(anchor_embed.weight.data[:, 1:], prior_duration)
+            anchor_embed.weight.data[:, 1:] = inverse_sigmoid(anchor_embed.weight.data[:, 1:])
+            anchor_embed.weight.data[:, 1:].requires_grad = True
+            print('Initilize the anchor duration point with: {}'.format(prior_duration))
+        reference_points = anchor_embed.weight.data.detach().clone().sigmoid().unsqueeze(0).expand(1, -1, -1) 
+        topk_coords_unact = inverse_sigmoid(reference_points[0, :, 0])
+        query_embed = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) # Position embedding receives non-sigmoided coordinates
+        # breakpoint()
+        return query_embed
+
+    def prepare_decoder_input_anchor(self, memory, query_anchor):
+        bs, _, _ = memory.shape
+        query_embed, anchor = query_anchor
+        position_embedding, tgt = torch.chunk(query_embed, 2, dim=1)
+        position_embedding = position_embedding.unsqueeze(0).expand(bs, -1, -1)
+        tgt = tgt.unsqueeze(0).expand(bs, -1, -1)
+        reference_points = anchor.sigmoid().unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, 2)
+        # tgt = query_embed[..., :self.d_model]
+        # tgt = tgt.unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, query_dim)
+        init_reference_out = reference_points
+
+        # topk_coords_unact = inverse_sigmoid(reference_points)
+        # position_embeding = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact)))
+        return init_reference_out, tgt, reference_points, position_embedding
+
+    def prepare_decoder_input_prior(self, proposals, num_queries=100):
+        '''
+        :param proposals: (batch, num_sentence, 2)
+        '''
+        bs,_,_ = proposals.shape
+        # Uniformly generate normalized coordinates according to number of sentences
+        reference_points_list = []
+        for i in range(bs):
+        # Generate N-1 points from 0~1 for each sentence uniformly
+            ns = proposals[i].shape[0] # number of sentences
+            reference_points_c = torch.linspace(0,1, 2*ns+1, dtype=torch.float32, device=proposals.device)
+            reference_points_c = reference_points_c[1:-1:2] # (num_sentence,)
+            reference_points_d = torch.Tensor([1.0/ns]).to(proposals.device).repeat(ns) # (num_sentence,)
+            reference_points = torch.stack([reference_points_c, reference_points_d], -1) # (num_sentence, 2)
+            # Padding the reference point to the same length
+            
+            num_query_per_sentence = num_queries // ns
+            reference_points = reference_points.repeat(1, num_query_per_sentence).reshape(-1,2)  # (num_queries, 2)
+            if num_queries % ns != 0: # Padding with zeros
+                num_padding = num_queries - num_query_per_sentence * ns
+                padding = torch.Tensor([[1.0, 1.0/ns]]).to(proposals.device).repeat(num_padding, 1)
+                reference_points = torch.cat([reference_points, padding], 0)
+            reference_points_list.append(reference_points)
+        reference_points = torch.stack(reference_points_list, 0) # (batch, num_queries, 2)
+        init_reference_out = reference_points[:,:,:1]
+        topk_coords_unact = inverse_sigmoid(reference_points)
+        pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim)
+        query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2)
+        return init_reference_out, tgt, reference_points[:,:,:1], query_embed
+
+    def prepare_decoder_input_proposal(self, gt_reference_points):
+        '''
+        :param gt_reference_points: (batch, num_sentence, 2)
+        '''
+        #breakpoint()
+        topk_coords_unact = inverse_sigmoid(gt_reference_points)
+        reference_points = gt_reference_points
+        init_reference_out = reference_points
+        pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim)
+        query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) # Split to query_embed and position_embed (bs, num_sentence, hidden_dim, 2)
+        return init_reference_out, tgt, reference_points, query_embed
+
+    def forward_decoder(self, *kargs):
+        hs, inter_references_out = self.decoder(*kargs)
+        return hs, inter_references_out
+
+
+class DeformableTransformerEncoderLayer(nn.Module):
+    def __init__(self,
+                 d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+
+        # self attention
+        self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout2 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout3 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, src):
+        src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
+        src = src + self.dropout3(src2)
+        src = self.norm2(src)
+        return src
+
+    def forward(self, src, pos, reference_points, temporal_shapes, level_start_index, padding_mask=None):
+        # self attention
+        src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, temporal_shapes, level_start_index,
+                              padding_mask)
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+
+        # ffn
+        src = self.forward_ffn(src)
+
+        return src
+
+
+class DeformableTransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+
+    @staticmethod
+    def get_reference_points(temporal_shapes, valid_ratios, device):
+        reference_points_list = []
+        for lvl, (L_) in enumerate(temporal_shapes):
+            ref = torch.linspace(0.5, L_ - 0.5, L_, dtype=torch.float32, device=device)
+            ref = ref.reshape(-1)[None] / (valid_ratios[:, None, lvl] * L_)
+            reference_points_list.append(ref)
+        reference_points = torch.cat(reference_points_list, 1)
+        reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+        reference_points = reference_points[:,:,:,None]
+        return reference_points
+
+    def forward(self, src, temporal_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None):
+        output = src
+        reference_points = self.get_reference_points(temporal_shapes, valid_ratios, device=src.device)
+        for _, layer in enumerate(self.layers):
+            output = layer(output, pos, reference_points, temporal_shapes, level_start_index, padding_mask)
+
+        return output
+
+
+class DeformableTransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+
+        # cross attention
+        self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # self attention
+        self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout3 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout4 = nn.Dropout(dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, tgt):
+        tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout4(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+    def forward(self, tgt, query_pos, reference_points, src, src_temporal_shapes, level_start_index,
+                src_padding_mask=None, query_mask=None):
+        # self attention
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), tgt.transpose(0, 1), key_padding_mask=~query_mask)[
+            0].transpose(0, 1)
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+
+        # cross attention
+        tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos),
+                               reference_points,
+                               src, src_temporal_shapes, level_start_index, src_padding_mask)
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+
+        # ffn
+        tgt = self.forward_ffn(tgt)
+        return tgt
+
+
+class DeformableTransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, return_intermediate=False, d_model=256, use_anchor=False):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.return_intermediate = return_intermediate
+        # hack implementation for iterative bounding box refinement and two-stage Deformable DETR
+        self.bbox_head = None
+        self.use_anchor = use_anchor
+        self.d_model = d_model
+        # if use_anchor:
+        #     self.anchor_head = MLP(d_model, d_model, d_model, 2)
+        #     self.scale_head = MLP(d_model, d_model, d_model, 2)
+
+
+    def forward(self, tgt, reference_points, src, src_temporal_shapes, src_level_start_index, src_valid_ratios,
+                query_pos=None, src_padding_mask=None, query_padding_mask=None, disable_iterative_refine=False):
+        output = tgt
+
+        intermediate = []
+        intermediate_reference_points = []
+        bs = tgt.shape[0]
+        for lid, layer in enumerate(self.layers):
+            if reference_points.shape[-1] == 2:
+                reference_points_input = reference_points[:, :, None] \
+                                         * torch.stack([src_valid_ratios, src_valid_ratios], -1)[:, None]
+            else:
+                assert reference_points.shape[-1] == 1
+                reference_points_input = reference_points[:, :, None] * src_valid_ratios[:, None, :, None]
+            # if self.use_anchor:
+                # query_sine_embed = gen_sineembed_for_position(reference_points_input[:,:,0,:], self.d_model)
+                # raw_query_pos = self.anchor_head(query_sine_embed) # num_query, bs, 256
+                # query_scale_embed = self.scale_head(output) if lid != 0 else 1
+                # query_pos = query_scale_embed * raw_query_pos
+            output = layer(output, query_pos, reference_points_input, src, src_temporal_shapes, src_level_start_index,
+                           src_padding_mask, query_padding_mask)
+
+            if self.use_anchor:
+                assert reference_points.shape[-1] == 2
+                
+            # hack implementation for iterative bounding box refinement
+            if disable_iterative_refine:
+                reference_points = reference_points
+            else:
+                if (self.bbox_head is not None):
+                    tmp = self.bbox_head[lid](output)
+                    if reference_points.shape[-1] == 2:
+                        new_reference_points = tmp + inverse_sigmoid(reference_points)
+                        new_reference_points = new_reference_points.sigmoid()
+                    else:
+                        assert reference_points.shape[-1] == 1
+                        new_reference_points = tmp
+                        new_reference_points[..., :1] = tmp[..., :1] + inverse_sigmoid(reference_points)
+                        new_reference_points = new_reference_points.sigmoid()
+                    reference_points = new_reference_points.detach()
+                else:
+                    reference_points = reference_points
+
+            if self.return_intermediate:
+                intermediate.append(output)
+                intermediate_reference_points.append(reference_points)
+        # breakpoint()
+
+        if self.return_intermediate:
+            return torch.stack(intermediate), torch.stack(intermediate_reference_points)
+
+        return output, reference_points
+
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+
+
+def gen_sineembed_for_position(pos_tensor, d_model):
+    # n_query, bs, _ = pos_tensor.size()
+    # sineembed_tensor = torch.zeros(n_query, bs, 256)
+    hidden_dim = d_model // 2
+    scale = 2 * math.pi
+    dim_t = torch.arange(hidden_dim, dtype=torch.float32, device=pos_tensor.device)
+    dim_t = 10000 ** (2 * (dim_t // 2) / hidden_dim)
+    x_embed = pos_tensor[:, :, 0] * scale
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+    if pos_tensor.size(-1) == 1:
+        pos = pos_x
+    elif pos_tensor.size(-1) == 2:
+        w_embed = pos_tensor[:, :, 1] * scale
+        pos_w = w_embed[:, :, None] / dim_t
+        pos_w = torch.stack((pos_w[:, :, 0::2].sin(), pos_w[:, :, 1::2].cos()), dim=3).flatten(2)
+
+        pos = torch.cat((pos_x, pos_w), dim=2)
+    else:
+        raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1)))
+    return pos
+
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+
+def build_deforamble_transformer(args):
+    return DeformableTransformer(
+        d_model=args.hidden_dim,
+        nhead=args.nheads,
+        num_encoder_layers=args.enc_layers,
+        num_decoder_layers=args.dec_layers,
+        dim_feedforward=args.transformer_ff_dim,
+        dropout=args.transformer_dropout_prob,
+        activation="relu",
+        return_intermediate_dec=True,
+        num_feature_levels=args.num_feature_levels,
+        dec_n_points=args.dec_n_points,
+        enc_n_points=args.enc_n_points,
+        use_anchor=args.use_anchor)
diff --git a/anet_clip/backup/pdvc/dp/CFSA.py b/anet_clip/backup/pdvc/dp/CFSA.py
new file mode 100644
index 0000000000000000000000000000000000000000..135defd0c1a48435405a27e2cc12532d86b5d79a
--- /dev/null
+++ b/anet_clip/backup/pdvc/dp/CFSA.py
@@ -0,0 +1,327 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from torch.nn import CrossEntropyLoss
+import math
+
+def compute_cls_loss(pred, labels, use_cosface=False):
+    if use_cosface:
+        # CosFace Loss
+        s, m = 30.0, 0.4
+        cos_value = torch.diagonal(pred.transpose(0, 1)[labels])
+        numerator = s * (cos_value - m)
+        excl = torch.cat([torch.cat((pred[i, :y], pred[i, y + 1:])).unsqueeze(0) for i, y in enumerate(labels)], dim=0)
+        denominator = torch.exp(numerator) + torch.sum(torch.exp(s * excl), dim=1)
+        L = numerator - torch.log(denominator)
+        loss = -torch.mean(L)
+    else:
+        # Softmax Loss
+        criterion = CrossEntropyLoss().cuda()
+        loss = criterion(pred, labels)
+
+    return loss
+
+
+def frame_blank_align_loss(seq_features1, seq_features2, step_num):
+    seq_features1 = seq_features1[:, 1:]
+    blank2 = seq_features2[:, :1]
+    seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    K = 2 * step_num + 1
+    sparse_seq_features2 = torch.cat((blank2, seq_features2[:, [5, 7, 8, 9, 11, 12, 13, 14], :]), dim=1)
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, sparse_seq_features2) / math.sqrt(C)).log_softmax(-1)
+
+    D_pre = torch.full((B, K), fill_value=float('-99999999'), device=device)
+    D_pre[:, 0] = pred[:, 0, 0]
+    D_pre[:, 1] = pred[:, 0, 1]
+    
+    for t in range(1, T):
+        D_cur = torch.full((B, K), fill_value=float('-99999999'), device=device)
+        D_cur[:, 0] = D_pre[:, 0] + pred[:, t, 0]
+        D_cur[:, 1] = torch.logsumexp(torch.stack([D_pre[:, 0], D_pre[:, 1]]), dim=0) + pred[:, t, 1]
+        
+        # blank term
+        blank_pre_ind = torch.arange(1, K, 2)[None, :].repeat(B, 1)
+        blank_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_pre_ind]
+        
+        blank_cur_ind = torch.arange(2, K, 2)[None, :].repeat(B, 1)
+        blank_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_cur_ind]
+        
+        blank_log_prob = torch.logsumexp(torch.stack([blank_pre, blank_cur]), dim=0)
+        D_cur[:, 2:][:, ::2] = blank_log_prob + pred[:, t, 0][:, None].repeat(1, blank_log_prob.shape[-1])
+        
+        # step term
+        step_prepre_ind = torch.arange(1, K, 2)[None, :-1].repeat(B, 1)
+        step_prepre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_prepre_ind]
+        
+        step_pre_ind = torch.arange(2, K, 2)[None, :-1].repeat(B, 1)
+        step_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_pre_ind]
+        
+        step_cur_ind = torch.arange(3, K, 2)[None, :].repeat(B, 1)
+        step_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_cur_ind]
+        
+        step_log_prob = torch.logsumexp(torch.stack([step_prepre, step_pre, step_cur]), dim=0)
+        D_cur[:, 2:][:, 1::2] = step_log_prob + pred[:, t, 2:]
+        D_pre = D_cur
+
+    fsa_distance = -torch.logsumexp(D_cur[:, -2:], dim=-1) / 13
+    loss = fsa_distance.mean(0)
+    
+    return loss
+
+
+def consist_step_mining(seq_features1, seq_features2, step_num):
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num - 1
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k >= 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+    
+    repeat_times1 = (segment1[:, 1:] - segment1[:, :-1]).flatten()
+    repeat_target1 = torch.arange(step_num, device=device).repeat((B, ))
+    step_index1 = repeat_target1.repeat_interleave(repeat_times1).reshape(B, T)
+    
+    repeat_times2 = (segment2[:, 1:] - segment2[:, :-1]).flatten()
+    repeat_target2 = torch.arange(step_num, device=device).repeat((B, ))
+    step_index2 = repeat_target2.repeat_interleave(repeat_times2).reshape(B, T)
+    
+    div_term = torch.exp(torch.arange(0, C, 2, device=device) * -(math.log(10000.0) / C))
+    
+    pos_emb1 = torch.zeros(B, T, C, device=device)
+    pos_emb1[:, :, 0::2] = torch.sin(step_index1.unsqueeze(-1) * div_term)
+    pos_emb1[:, :, 1::2] = torch.cos(step_index1.unsqueeze(-1) * div_term)
+    
+    pos_emb2 = torch.zeros(B, T, C, device=device)
+    pos_emb2[:, :, 0::2] = torch.sin(step_index2.unsqueeze(-1) * div_term)
+    pos_emb2[:, :, 1::2] = torch.cos(step_index2.unsqueeze(-1) * div_term)
+    
+    return pos_emb1, pos_emb2, segment1[:, :-1]+1, segment2[:, :-1]+1
+
+
+
+def consist_step_mining_train(seq_features1, seq_features2, step_num, pair_labels):
+    # seq_features1 = seq_features1[:, 1:]
+    # seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] \
+            - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \
+                - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1)
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+
+    for k in range(1, T):
+        tmp = D[:, k-1, None, None, :, :] + block_mat
+        D[:, k] = tmp.flatten(3).max(-1).values
+        D_ind[:, k] = tmp.flatten(3).max(-1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k > 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+        
+    final_result = D[:, :, T-1, T-1]
+    
+    video_seg1 = segment1[:, :-1] + 1
+    video_seg2 = segment2[:, :-1] + 1
+    
+    # loss_step = (-(pair_labels * final_result.max(dim=-1).values)).sum()
+    loss_step = -(pair_labels * final_result.max(dim=-1).values).mean()
+    
+    return loss_step, video_seg1, video_seg2
+
+
+
+def consist_step_mining_inference(seq_features1, seq_features2, step_num):
+    seq_features1 = seq_features1[:, 1:]
+    seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    # pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] \
+            - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \
+                - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k > 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+        
+    return segment1[:, :-1] + 1, segment2[:, :-1] + 1
+
+
+def step_align_loss(seq_features1, seq_features2):
+    B, T, C = seq_features1.shape
+    # the similarity matrix: 16 * 16
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=seq_features1.device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=pred.device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    i, j, a, b = torch.meshgrid(*[torch.arange(T, device=seq_features1.device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((a >= i) | (b >= j)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        # tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        tmp = D[:, k-1, None, None, :, :] + block_mat
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    final_result = D[:, :, T-1, T-1]
+    return -(final_result.max(dim=-1).values).mean(), final_result.max(dim=-1).indices, D_ind
+
+
+def single_align_loss(seq_features1, seq_features2):
+    device = seq_features1.device
+    T, C = seq_features1.shape
+    pred = (torch.einsum('ic,jc->ij', seq_features1, seq_features2) / math.sqrt(C)).log_softmax(-1)
+    
+    ZERO_PAD = torch.zeros((1), device=device)
+    ONE_PAD = torch.ones((1), device=device)
+    S = seq_features2.shape[0]
+
+    target = (torch.arange(S, device=device))
+    
+    D_TABLE = ONE_PAD.log()
+    for t in range(T):
+        D_VEC_1 = torch.logsumexp(torch.stack([D_TABLE[1:t+1], D_TABLE[:-1][:t]]), 0) + pred[t, target[:t]]
+        D_VEC_2 = D_TABLE[t:t+1] + pred[t, target[t:t+1]]
+        D_TABLE = torch.cat([ZERO_PAD.log(), D_VEC_1, D_VEC_2], dim=-1)
+    # changed by hotel: remove " / s"
+    ctc_distance = -D_TABLE[S]
+    return ctc_distance
+
+
+def frame2varstep_loss(seq_features1, seq_features2, video_seg):
+    B, T, C = seq_features1.shape
+    losses = []
+    for batch in range(B):
+        seq_feature1 = seq_features1[batch]
+        
+        cur_seg = video_seg[batch]
+        cur_seg = cur_seg[:-1] + 1
+        sparse_feature2 = seq_features2[batch, cur_seg, :]
+        frame_loss = single_align_loss(seq_feature1, sparse_feature2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1).mean(-1)
+
+
+def frame2varstep_dist(seq_features1, seq_features2, video_seg):
+    B, T, C = seq_features1.shape
+    losses = []
+    for batch in range(B):
+        seq_feature1 = seq_features1[batch]
+        
+        cur_seg = video_seg[batch]
+        cur_seg = cur_seg[:-1] + 1
+        sparse_feature2 = seq_features2[batch, cur_seg, :]
+        frame_loss = single_align_loss(seq_feature1, sparse_feature2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1)
+
+
+def frame2learnedstep_dist(frame_feats1, step_feats2):
+    B, T, C = frame_feats1.shape
+    losses = []
+    for batch in range(B):
+        frame_feat1 = frame_feats1[batch]
+        step_feat2 = step_feats2[batch]
+        # step_feat2 = step_feat2[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
+        frame_loss = single_align_loss(frame_feat1, step_feat2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1)
diff --git a/anet_clip/backup/pdvc/dp/__init__.py b/anet_clip/backup/pdvc/dp/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd28dccf2f11d713b40d4e237cb5a055bf54ca5d
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1197f07fc41ae6f41b581ebd13f30b674234acf4
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24490f9a2f7cc151dc46f67b4d4ae214dba5c47a
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3b35d663b4275176bf9f37c5dff954afd66df0e6
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fe93fd162f629560d23a2791ff3dab2c276d70c
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f2ae8f9d246202b485f89aa690174225dc2e66e
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e7d1ad496851d504c4b5de3cabed3465262cf89
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af2c6ca1bfc47fc34f69aaeee119c1c439fdea4b
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..214dc29706641783b09e447117f540f723ec6868
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d84ee83c249b2c327db4180485c62581e0bcb345
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae95ac7acddab327941068e44fcc974789c6d059
Binary files /dev/null and b/anet_clip/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/dp/dp_utils.py b/anet_clip/backup/pdvc/dp/dp_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1dcdb6e6cb0385b1862aff36c779cdda89cf563
--- /dev/null
+++ b/anet_clip/backup/pdvc/dp/dp_utils.py
@@ -0,0 +1,402 @@
+import numpy as np
+import torch
+import math
+
+from itertools import product
+from torch import log, exp
+import torch.nn.functional as F
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def compute_all_costs(
+    z_features,
+    x_features,
+    gamma_xz,
+    drop_cost_type,
+    keep_percentile,
+    l2_normalize=False,
+    given_baseline_logits=None,
+    return_baseline=False,
+):
+    """This function computes pairwise match and individual drop costs used in Drop-DTW
+
+    Parameters
+    __________
+
+    sample: dict
+        sample dictionary
+    distractor: torch.tensor of size [d] or None
+        Background class prototype. Only used if the drop cost is learnable.
+    drop_cost_type: str
+        The type of drop cost definition, i.g., learnable or logits percentile.
+    keep_percentile: float in [0, 1]
+        if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile
+    l2_normalize: bool
+        wheather to normalize clip and step features before computing the costs
+    """
+
+    if l2_normalize:
+        x_features = F.normalize(x_features, p=2, dim=1)
+        z_features = F.normalize(z_features, p=2, dim=1)
+
+    sim = z_features @ x_features.T
+
+    if drop_cost_type == "logit":
+        if keep_percentile > 1:
+            baseline_logit = sim.min().detach() - 1
+        else:
+            k = max([1, int(torch.numel(sim) * keep_percentile)])
+            baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+        baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+        sims_ext = torch.cat([sim, baseline_logits], dim=0)
+    else:
+        assert False, f"No such drop mode {drop_cost_type}"
+
+    softmax_sims = torch.nn.functional.softmax(sims_ext / gamma_xz, dim=0)
+    matching_probs, drop_probs = softmax_sims[:-1], softmax_sims[-1]
+    zx_costs = -torch.log(matching_probs + 1e-5)
+    drop_costs = -torch.log(drop_probs + 1e-5)
+    return zx_costs, drop_costs, drop_probs
+
+
+def compute_double_costs(
+    z_features,
+    x_features,
+    gamma_xz,
+    drop_cost_type,
+    keep_percentile,
+    l2_normalize=False,
+    return_baseline=False,
+):
+    """This function computes pairwise match and individual drop costs used in Drop-DTW
+
+    Parameters
+    __________
+
+    sample: dict
+        sample dictionary
+    distractor: torch.tensor of size [d] or None
+        Background class prototype. Only used if the drop cost is learnable.
+    drop_cost_type: str
+        The type of drop cost definition, i.g., learnable or logits percentile.
+    keep_percentile: float in [0, 1]
+        if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile
+    l2_normalize: bool
+        wheather to normalize clip and step features before computing the costs
+    """
+
+    z_features, frame_features = z_features, x_features
+    if l2_normalize:
+        x_features = F.normalize(frame_features, p=2, dim=1)
+        z_features = F.normalize(z_features, p=2, dim=1)
+    sim = z_features @ x_features.T
+
+    if drop_cost_type == "logit":
+        k = max([1, int(torch.numel(sim) * keep_percentile)])
+        baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    else:
+        assert False, f"No such drop mode {drop_cost_type}"
+    sim_ext = F.pad(sim, (0, 1, 0, 1), value=baseline_logit)
+
+    softmax_sims = torch.nn.functional.softmax(sim_ext.reshape(-1) / gamma_xz, dim=0).reshape(sim_ext.shape)
+    matching_probs, x_drop_probs, z_drop_probs = softmax_sims[:-1, :-1], softmax_sims[-1, :-1], softmax_sims[:-1, -1]
+    zx_costs = -torch.log(matching_probs + 1e-5)
+    x_drop_costs = -torch.log(x_drop_probs + 1e-5)
+    z_drop_costs = -torch.log(z_drop_probs + 1e-5)
+    return zx_costs, x_drop_costs, z_drop_costs
+
+
+class VarTable:
+    def __init__(self, dims, dtype=torch.float, device=device):
+        self.dims = dims
+        d1, d2, d_rest = dims[0], dims[1], dims[2:]
+
+        self.vars = []
+        for i in range(d1):
+            self.vars.append([])
+            for j in range(d2):
+                var = torch.zeros(d_rest).to(dtype).to(device)
+                self.vars[i].append(var)
+
+    def __getitem__(self, pos):
+        i, j = pos
+        return self.vars[i][j]
+
+    def __setitem__(self, pos, new_val):
+        i, j = pos
+        if self.vars[i][j].sum() != 0:
+            assert False, "This cell has already been assigned. There must be a bug somwhere."
+        else:
+            self.vars[i][j] = self.vars[i][j] + new_val
+
+    def show(self):
+        device, dtype = self[0, 0].device, self[0, 0].dtype
+        mat = torch.zeros((self.d1, self.d2, self.d3)).to().to(dtype).to(device)
+        for dims in product([range(d) for d in self.dims]):
+            i, j, rest = dims[0], dims[1], dims[2:]
+            mat[dims] = self[i, j][rest]
+        return mat
+
+
+def minGamma(inputs, gamma=1, keepdim=True):
+    """continuous relaxation of min defined in the D3TW paper"""
+    if type(inputs) == list:
+        if inputs[0].shape[0] == 1:
+            inputs = torch.cat(inputs)
+        else:
+            inputs = torch.stack(inputs, dim=0)
+
+    if gamma == 0:
+        minG = inputs.min(dim=0, keepdim=keepdim)
+    else:
+        # log-sum-exp stabilization trick
+        zi = -inputs / gamma
+        max_zi = zi.max()
+        log_sum_G = max_zi + log(exp(zi - max_zi).sum(dim=0, keepdim=keepdim) + 1e-5)
+        minG = -gamma * log_sum_G
+    return minG
+
+
+def minProb(inputs, gamma=1, keepdim=True):
+    if type(inputs) == list:
+        if inputs[0].shape[0] == 1:
+            inputs = torch.cat(inputs)
+        else:
+            inputs = torch.stack(inputs, dim=0)
+
+    if gamma == 0:
+        minP = inputs.min(dim=0, keepdim=keepdim)
+    else:
+        probs = F.softmax(-inputs / gamma, dim=0)
+        minP = (probs * inputs).sum(dim=0, keepdim=keepdim)
+    return minP
+
+
+def prob_min(values, gamma_min, logits=None):
+    logits = values if logits is None else logits
+    assert len(logits) == len(values), "Values and prob logits are of different length"
+
+    if len(values) > 1:
+        values = torch.cat(values, dim=-1)
+        logits = torch.cat(logits, dim=-1)
+    else:
+        values = values[0]
+        logits = logits[0]
+
+    if gamma_min > 0:
+        probs = F.softmax(-logits / gamma_min, dim=-1)
+    else:
+        probs = F.one_hot(logits.argmin(), logits.size(-1))
+
+    if values.dim() > probs.dim():
+        probs = probs[..., None, :]
+
+    out = (values * probs).sum(-1).to(values.dtype)
+    return out
+
+
+def list_min(values, keys=None):
+    keys = values if keys is None else keys
+    assert len(keys) == len(values), "Values and prob logits are of different length"
+
+    if values[0].dim() == keys[0].dim() + 1:
+        dim = -2
+    else:
+        dim = -1
+
+    if len(values) > 1:
+        values = torch.cat(values, dim=dim)
+        keys = torch.cat(keys, dim=-1)
+    else:
+        values = values[0]
+        keys = keys[0]
+
+    onehot = F.one_hot(keys.argmin(-1), keys.size(-1))
+    if values.dim() > keys.dim():
+        onehot = onehot[..., None]
+    out = (values * onehot).sum(dim).to(values.dtype)
+    return out
+
+
+def traceback(D):
+    i, j = np.array(D.shape) - 2
+    p, q = [i], [j]
+    while (i > 0) or (j > 0):
+        tb = np.argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
+        if tb == 0:
+            i -= 1
+            j -= 1
+        elif tb == 1:
+            i -= 1
+        else:  # (tb == 2):
+            j -= 1
+        p.insert(0, i)
+        q.insert(0, j)
+    return np.array(p), np.array(q)
+
+
+def diag_to_mat(diags, K, N):
+    mat = np.zeros([K, N]) - 123
+    for d in range(len(diags)):
+        for r, v in enumerate(diags[d]):
+            j = min(d, N - 1) - r
+            i = d - j
+            mat[i, j] = v if v < 1e8 else np.inf
+    return mat
+
+
+def pad_costs(zx_costs_list, drop_costs_list):
+    B = len(zx_costs_list)
+    Ns, Ks = [], []
+    for i in range(B):
+        Ki, Ni = zx_costs_list[i].shape
+        if Ki >= Ni:
+            # in case the number of steps is greater than the number of frames,
+            # duplicate every frame and let the drops do the job.
+            mult = math.ceil(Ki / Ni)
+            zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1])
+            drop_costs_list[i] = torch.stack([drop_costs_list[i]] * mult, dim=-1).reshape([-1])
+            Ni *= mult
+        Ns.append(Ni)
+        Ks.append(Ki)
+    N, K = max(Ns), max(Ks)
+
+    # preparing padded tables
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], []
+    for i in range(B):
+        zx_costs = zx_costs_list[i]
+        drop_costs = drop_costs_list[i]
+        cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+        # padding everything to the size of the largest N and K
+        row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device)
+        padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad]))
+        padded_drop_costs.append(torch.cat([drop_costs, row_pad]))
+        multirow_pad = torch.stack([row_pad + 9999999999] * Ks[i], dim=0)
+        padded_table = torch.cat([zx_costs, multirow_pad], dim=1)
+        rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + 9999999999
+        padded_table = torch.cat([padded_table, rest_pad], dim=0)
+        padded_zx_costs.append(padded_table)
+    return padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks
+
+
+def get_diag_coord_grid(B, d_len, num_states, d_idx):
+    """
+    B - batch size
+    d - num_elements in the diagonal
+    num_states - number of states in DP table
+    d_idx - idx of the diagonal , used for marking
+    """
+    r = torch.arange(d_len)
+    s = torch.arange(num_states)
+    d = torch.ones(d_len, num_states) * d_idx
+    mg = torch.stack([d, *torch.meshgrid(r, s)], dim=-1)[None, ...].repeat([B, 1, 1, 1])
+    return mg
+
+
+def diag_traceback(pointer, N, paths):
+    # getting rid of unnecessary elements in the batch
+    pointer = [int(l.item()) for l in pointer]
+    d, r, s = pointer
+    traceback = [pointer]
+    while d > 0:
+        new_pointer = [int(l.item()) for l in paths[d][r, s]]
+        traceback.append(new_pointer)
+        d, r, s = new_pointer
+
+    # transform to rectangular coordinates
+    rectangular_traceback = []
+    for d, r, s in traceback:
+        i = r + max(0, d - N + 1)
+        j = d - i
+        if i > 0 and j > 0:
+            rectangular_traceback.append((i, j, s))
+
+    return traceback, rectangular_traceback
+
+
+def nw_diag_traceback(d, r, N, paths):
+    d, r = int(d.item()), int(r.item())
+    traceback = []
+    while d > 0:
+        d_1, s_1, s = [int(l.item()) for l in paths[d][r, 0]]
+        traceback.append((d, r, s))
+        d, r = d_1, s_1
+
+    # transform to rectangular coordinates
+    rectangular_traceback = []
+    for d, r, s in traceback:
+        i = r + max(0, d - N + 1)
+        j = d - i
+        if i > 0 and j > 0:
+            rectangular_traceback.append((i, j, s))
+
+    return traceback, rectangular_traceback
+
+
+def compute_symmetric_cost(sim, keep_percentile=0.3):
+    k = max([1, int(torch.numel(sim) * keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+    zx_costs = -sim
+    x_drop_costs = -baseline_logits.squeeze()
+    z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze()
+    return zx_costs, x_drop_costs, z_drop_costs
+
+
+#============ Hach from model_utilis.py in StepFormer ============#
+
+
+
+def unique_softmax(sim, labels, gamma=1, dim=0):
+    assert sim.shape[0] == labels.shape[0]
+    labels = labels.detach().cpu().numpy()
+    _, unique_index, unique_inverse_index = np.unique(labels, return_index=True, return_inverse=True)
+    unique_sim = sim[unique_index]
+    unique_softmax_sim = torch.nn.functional.softmax(unique_sim / gamma, dim=dim)
+    softmax_sim = unique_softmax_sim[unique_inverse_index]
+    return softmax_sim
+
+def compute_masked_sims(z, x, z_pad_mask, x_pad_mask, l2_normalize=False, softmax_dim=None, gamma=None):
+    # z ~ [B, K, d], x ~ [B, N, d]
+    if l2_normalize:
+        z, x = F.normalize(z, dim=-1), F.normalize(x, dim=-1)
+    pad_sims = torch.einsum("bkd,bnd->bkn", z, x)
+    masked_sims = []
+    for i in range(x.shape[0]):
+        masked_sim = pad_sims[i]
+        masked_sim = masked_sim if z_pad_mask is None else masked_sim[~z_pad_mask[i], :]
+        masked_sim = masked_sim if x_pad_mask is None else masked_sim[:, ~x_pad_mask[i]]
+        if softmax_dim is not None:
+            masked_sim = F.softmax(masked_sim / gamma, dim=softmax_dim)
+        masked_sims.append(masked_sim)
+    return masked_sims  
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+
+def cosine_sim(x, z):
+    cos_sim_fn = torch.nn.CosineSimilarity(dim=1)
+    return cos_sim_fn(x[..., None], z.T[None, ...])
+
+
+def cos_dist(x, z):
+    cos_sim_fn = torch.nn.CosineSimilarity(dim=1)
+    return (1 - cos_sim_fn(x[..., None], z.T[None, ...])) / 2
+
+
+def l2_dist(x, z):
+    dist_squared = (x**2).sum() + (z**2).sum() - 2 * x @ z.T
+    return torch.clamp(dist_squared, min=0).sqrt()
+
+
+def cos_loglikelihood(x, z, gamma=0.1, z_dim=1):
+    cos_sim = cosine_sim(x, z)
+    probs = F.softmax(cos_sim / gamma, dim=z_dim)
+    return torch.log(probs)
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/dp/exact_dp.py b/anet_clip/backup/pdvc/dp/exact_dp.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada874b89a60799af867aab82357c8d7b442348d
--- /dev/null
+++ b/anet_clip/backup/pdvc/dp/exact_dp.py
@@ -0,0 +1,1123 @@
+import torch
+import numpy as np
+import torch.nn.functional as F
+from functools import partial
+from copy import copy
+
+# from dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min
+from pdvc.dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min
+
+
+def crosstask_dp(cost_matrix, exactly_one=True, bg_cost=0):
+    "Algorithm used in Cross-Task to calculate Recall"
+
+    def get_step(k):
+        return 0 if k % 2 == 0 else int((k + 1) / 2)
+
+    T = cost_matrix.shape[0]
+    K = cost_matrix.shape[1]
+    K_ext = int(2 * K + 1)
+
+    L = -np.ones([T + 1, K_ext], dtype=float)
+    P = -np.ones([T + 1, K_ext], dtype=float)
+    L[0, 0] = 0
+    P[0, 0] = 0
+
+    for t in range(1, T + 1):
+        Lt = L[t - 1, :]
+        Pt = P[t - 1, :]
+        for k in range(K_ext):
+            s = get_step(k)
+            opt_label = -1
+
+            j = k
+            if (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1 and (s == 0 or not exactly_one):
+                opt_label = j
+                opt_value = Lt[j]
+
+            j = k - 1
+            if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1:
+                opt_label = j
+                opt_value = L[t - 1][j]
+
+            if s != 0:
+                j = k - 2
+                if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1:
+                    opt_label = j
+                    opt_value = Lt[j]
+
+            if s != 0:
+                L[t, k] = opt_value + cost_matrix[t - 1][s - 1]
+            else:
+                L[t, k] = opt_value + bg_cost
+            P[t, k] = opt_label
+
+    labels = np.zeros_like(cost_matrix)
+    if L[T, K_ext - 1] < L[T, K_ext - 2] or (P[T, K_ext - 2] == -1):
+        k = K_ext - 1
+    else:
+        k = K_ext - 2
+    for t in range(T, 0, -1):
+        s = get_step(k)
+        if s > 0:
+            labels[t - 1, s - 1] = 1
+        k = P[t, k].astype(int)
+    return labels
+
+
+def iou_based_matching(pred_seg, gt_seg, pred_step_ids, gt_step_ids, ignore_class=True):
+    """Performs the matching of predicted and gt sequence segments"""
+    pred_segments = torch.stack([pred_seg == idx for idx in pred_step_ids], 0)  # [N_pred, T]
+    gt_segments = torch.stack([gt_seg == idx for idx in gt_step_ids], 0)  # [N_gt, T]
+    intersection = (
+        torch.logical_and(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1)
+    )  # [N_pred, N_gt]
+    union = torch.logical_or(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1)  # [N_pred, N_gt]
+    iou = intersection / (union + 1e-5)  # [N_pred, N_gt]
+
+    C = -iou.detach().cpu().numpy().T  # [N_gt, N_pred]
+    if not ignore_class:
+        print("Not ignoring class")
+        is_same_step_id = pred_step_ids.unsqueeze(1) == gt_step_ids.unsqueeze(0)  # [N_pred, N_gt]
+        if is_same_step_id.shape == (1, 1):
+            C[0, 0] += 9999 * (~is_same_step_id[0, 0])
+        else:
+            C[~is_same_step_id] = 9999
+
+    x_drop, z_drop = np.zeros(C.shape[1]), np.zeros(C.shape[0])
+    labels = double_drop_dtw(C, x_drop, z_drop, one_to_many=False, many_to_one=False, return_labels=True) - 1
+    indices = (np.arange(len(labels))[labels > -1], labels[labels > -1])
+    return [torch.as_tensor(i, dtype=torch.int64) for i in indices]
+
+
+def drop_dtw(zx_costs, drop_costs, exclusive=True, contiguous=True, one_to_one=False, return_labels=False):
+    """Drop-DTW algorithm that allows drop only from one (video) side. See Algorithm 1 in the paper.
+
+    Parameters
+    ----------
+    zx_costs: np.ndarray [K, N]
+        pairwise match costs between K steps and N video clips
+    drop_costs: np.ndarray [N]
+        drop costs for each clip
+    exclusive: bool
+        If True any clip can be matched with only one step, not many.
+    contiguous: bool
+        if True, can only match a contiguous sequence of clips to a step
+        (i.e. no drops in between the clips)
+    return_label: bool
+        if True, returns output directly useful for segmentation computation (made for convenience)
+    """
+    K, N = zx_costs.shape
+
+    # D: the dynamic programming table, which records the intermediate costs
+    # P: the path tracking table, which records the previous location and state (zi, xi, prev_state)
+
+    # initialize solutin matrices
+    D = np.zeros([K + 1, N + 1, 2])  # the 2 last dimensions correspond to different states.
+    # State (dim) 0 - x is matched; State 1 - x is dropped
+    D[1:, 0, :] = np.inf  # no drops in z in any state
+    D[0, 1:, 0] = np.inf  # no drops in x in state 0, i.e. state where x is matched
+    D[0, 1:, 1] = np.cumsum(drop_costs)  # drop costs initizlization in state 1
+
+    # initialize path tracking info for each state
+    P = np.zeros([K + 1, N + 1, 2, 3], dtype=int) # the last dimension records the previous location and state (zi, xi, prev_state)
+    for xi in range(1, N + 1):
+        P[0, xi, 1] = 0, xi - 1, 1
+    # filling in the dynamic tables
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1]
+            diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states]
+            diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states]
+
+            left_neigh_states = [0, 1]
+            left_neigh_coords = [(zi, xi - 1) for _ in left_neigh_states]
+            left_neigh_costs = [D[zi, xi - 1, s] for s in left_neigh_states]
+
+            left_pos_neigh_states = [0] if contiguous else left_neigh_states
+            left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states]
+            left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] # Drop between clips is not allowed when setting `contiguous==True` (one step to sparse clips is not allowed)
+
+            top_pos_neigh_states = [0]
+            top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states]
+            top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # state 0: matching x to z
+            neigh_states_pos = diag_neigh_states
+            neigh_coords_pos = diag_neigh_coords
+            neigh_costs_pos = diag_neigh_costs
+            if not one_to_one:
+                neigh_states_pos = neigh_states_pos + left_pos_neigh_states
+                neigh_coords_pos = neigh_coords_pos + left_pos_neigh_coords
+                neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs
+            if not exclusive: # exclusive=True indicates any clip can be matched with only one step, that is, path from top is not allowed
+                neigh_states_pos = neigh_states_pos + top_pos_neigh_states
+                neigh_coords_pos = neigh_coords_pos + top_pos_neigh_coords
+                neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + top_pos_neigh_costs
+
+            costs_pos = np.array(neigh_costs_pos) + zx_costs[z_cost_ind, x_cost_ind] # calculate cumulative cost in current step
+            opt_ind_pos = np.argmin(costs_pos)
+            P[zi, xi, 0] = *neigh_coords_pos[opt_ind_pos], neigh_states_pos[opt_ind_pos] # Records the last step's position (zi,xi) and state (0 or 1)
+            D[zi, xi, 0] = costs_pos[opt_ind_pos]  # Update the minimal cumulative cost of selected path
+
+            # state 1: x is dropped
+            costs_neg = np.array(left_neigh_costs) + drop_costs[x_cost_ind]
+            opt_ind_neg = np.argmin(costs_neg)
+            P[zi, xi, 1] = *left_neigh_coords[opt_ind_neg], left_neigh_states[opt_ind_neg]
+            D[zi, xi, 1] = costs_neg[opt_ind_neg]
+
+    cur_state = D[K, N, :].argmin()
+    min_cost = D[K, N, cur_state]
+    #breakpoint()
+
+    # backtracking the solution
+    zi, xi = K, N
+    path, labels = [], np.zeros(N)
+    x_dropped = [] if cur_state == 1 else [N]
+    while not (zi == 0 and xi == 0):
+        path.append((zi, xi))
+        zi_prev, xi_prev, prev_state = P[zi, xi, cur_state]
+        if xi > 0:
+            labels[xi - 1] = zi * (cur_state == 0)  # either zi or 0
+        if prev_state == 1:
+            x_dropped.append(xi_prev)
+        zi, xi, cur_state = zi_prev, xi_prev, prev_state
+
+    if not return_labels:
+        return min_cost, D, path, x_dropped
+    else:
+        return labels
+
+
+def double_drop_dtw(
+    pairwise_zx_costs,
+    x_drop_costs,
+    z_drop_costs,
+    contiguous=True,
+    one_to_many=True,
+    many_to_one=True,
+    return_labels=False,
+):
+    """Drop-DTW algorithm that allows drops from both sequences. See Algorithm 1 in Appendix.
+
+    Parameters
+    ----------
+    pairwise_zx_costs: np.ndarray [K, N]
+        pairwise match costs between K steps and N video clips
+    x_drop_costs: np.ndarray [N]
+        drop costs for each clip
+    z_drop_costs: np.ndarray [N]
+        drop costs for each step
+    contiguous: bool
+        if True, can only match a contiguous sequence of clips to a step
+        (i.e. no drops in between the clips)
+    """
+    K, N = pairwise_zx_costs.shape
+
+    # initialize solution matrices
+    D = np.zeros([K + 1, N + 1, 4])  # the 4 dimensions are the following states: zx, z-, -x, --
+    # no drops allowed in zx DP. Setting the same for all DPs to change later here.
+    D[1:, 0, :] = 99999999
+    D[0, 1:, :] = 99999999
+    D[0, 0, 1:] = 99999999
+    # Allow to drop x in z- and --
+    D[0, 1:, 1], D[0, 1:, 3] = np.cumsum(x_drop_costs), np.cumsum(x_drop_costs)
+    # Allow to drop z in -x and --
+    D[1:, 0, 2], D[1:, 0, 3] = np.cumsum(z_drop_costs), np.cumsum(z_drop_costs)
+
+    # initialize path tracking info for each of the 4 DP tables:
+    P = np.zeros([K + 1, N + 1, 4, 3], dtype=int)  # (zi, xi, prev_state)
+    for zi in range(1, K + 1):
+        P[zi, 0, 2], P[zi, 0, 3] = (zi - 1, 0, 2), (zi - 1, 0, 3)
+    for xi in range(1, N + 1):
+        P[0, xi, 1], P[0, xi, 3] = (0, xi - 1, 1), (0, xi - 1, 3)
+
+    # filling in the dynamic tables
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1, 2, 3]  # zx, z-, -x, --
+            diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states]
+            diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states]
+
+            left_pos_neigh_states = [0, 1]  # zx and z-
+            left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states]
+            left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states]
+
+            top_pos_neigh_states = [0, 2]  # zx and -x
+            top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states]
+            top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states]
+
+            left_neg_neigh_states = [2, 3]  # -x and --
+            left_neg_neigh_coords = [(zi, xi - 1) for _ in left_neg_neigh_states]
+            left_neg_neigh_costs = [D[zi, xi - 1, s] for s in left_neg_neigh_states]
+
+            top_neg_neigh_states = [1, 3]  # z- and --
+            top_neg_neigh_coords = [(zi - 1, xi) for _ in top_neg_neigh_states]
+            top_neg_neigh_costs = [D[zi - 1, xi, s] for s in top_neg_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # DP 0: coming to zx
+            neigh_states_zx = diag_neigh_states
+            neigh_coords_zx = diag_neigh_coords
+            neigh_costs_zx = diag_neigh_costs
+            if one_to_many:
+                if contiguous:
+                    neigh_states_zx.extend(left_pos_neigh_states[0:1])
+                    neigh_coords_zx.extend(left_pos_neigh_coords[0:1])
+                    neigh_costs_zx.extend(left_pos_neigh_costs[0:1])
+                else:
+                    neigh_states_zx.extend(left_pos_neigh_states)
+                    neigh_coords_zx.extend(left_pos_neigh_coords)
+                    neigh_costs_zx.extend(left_pos_neigh_costs)
+            if many_to_one:
+                neigh_states_zx.extend(top_pos_neigh_states)
+                neigh_coords_zx.extend(top_pos_neigh_coords)
+                neigh_costs_zx.extend(top_pos_neigh_costs)
+
+            costs_zx = np.array(neigh_costs_zx) + pairwise_zx_costs[z_cost_ind, x_cost_ind]
+            opt_ind_zx = np.argmin(costs_zx)
+            P[zi, xi, 0] = *neigh_coords_zx[opt_ind_zx], neigh_states_zx[opt_ind_zx]
+            D[zi, xi, 0] = costs_zx[opt_ind_zx]
+
+            # DP 1: coming to z-
+            neigh_states_z_ = left_pos_neigh_states
+            neigh_coords_z_ = left_pos_neigh_coords
+            neigh_costs_z_ = left_pos_neigh_costs
+            costs_z_ = np.array(neigh_costs_z_) + x_drop_costs[x_cost_ind]
+            opt_ind_z_ = np.argmin(costs_z_)
+            P[zi, xi, 1] = *neigh_coords_z_[opt_ind_z_], neigh_states_z_[opt_ind_z_]
+            D[zi, xi, 1] = costs_z_[opt_ind_z_]
+
+            # DP 2: coming to -x
+            neigh_states__x = top_pos_neigh_states
+            neigh_coords__x = top_pos_neigh_coords
+            neigh_costs__x = top_pos_neigh_costs
+            costs__x = np.array(neigh_costs__x) + z_drop_costs[z_cost_ind]
+            opt_ind__x = np.argmin(costs__x)
+            P[zi, xi, 2] = *neigh_coords__x[opt_ind__x], neigh_states__x[opt_ind__x]
+            D[zi, xi, 2] = costs__x[opt_ind__x]
+
+            # DP 3: coming to --
+            neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states)
+            # neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states + diag_neigh_states)
+            # adding negative left and top neighbors
+            neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords)
+            # neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords + diag_neigh_coords)
+            costs___ = np.concatenate(
+                [
+                    left_neg_neigh_costs + x_drop_costs[x_cost_ind],
+                    top_neg_neigh_costs + z_drop_costs[z_cost_ind],
+                    # diag_neigh_costs + z_drop_costs[z_cost_ind] + x_drop_costs[x_cost_ind],
+                ],
+                0,
+            )
+
+            opt_ind___ = costs___.argmin()
+            P[zi, xi, 3] = *neigh_coords___[opt_ind___], neigh_states___[opt_ind___]
+            D[zi, xi, 3] = costs___[opt_ind___]
+
+    cur_state = D[K, N, :].argmin()
+    min_cost = D[K, N, cur_state]
+
+    # unroll path
+    path = []
+    zi, xi = K, N
+    x_dropped = [N] if cur_state in [1, 3] else []
+    z_dropped = [K] if cur_state in [2, 3] else []
+    while not (zi == 0 and xi == 0):
+        path.append((zi, xi))
+        zi_prev, xi_prev, prev_state = P[zi, xi, cur_state]
+        if prev_state in [1, 3]:
+            x_dropped.append(xi_prev)
+        if prev_state in [2, 3]:
+            z_dropped.append(zi_prev)
+        zi, xi, cur_state = zi_prev, xi_prev, prev_state
+
+    if return_labels:
+        labels = np.zeros(N)
+        for zi, xi in path:
+            if zi not in z_dropped and xi not in x_dropped:
+                labels[xi - 1] = zi
+        return labels
+    else:
+        return min_cost, path, x_dropped, z_dropped
+
+
+def batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True
+):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]]
+        neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]]
+
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+        coord_left_pos, coord_left_neg = coord_left[..., [0, 1], :], coord_left[..., [2, 3], :]
+        coord_up_pos, coord_up_neg = coord_up[..., [0, 2], :], coord_up[..., [1, 3], :]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors_zx = [neigh_diag]
+        coordinates_zx = [coord_diag]
+        if one_to_many:
+            neighbors_zx.append(neigh_left_pos[..., [0]] if contiguous else neigh_left)
+            coordinates_zx.append(coord_left_pos[..., [0], :] if contiguous else coord_left)
+        if many_to_one:
+            neighbors_zx.append(neigh_up_pos)
+            coordinates_zx.append(coord_up_pos)
+        diag_zx = list_min(neighbors_zx) + match_costs_diag
+        path_zx = list_min(coordinates_zx, keys=neighbors_zx)
+
+        # DP 1: coming to z-
+        neighbors_z_ = [neigh_left_pos]
+        coordinates_z_ = [coord_left_pos]
+        diag_z_ = list_min(neighbors_z_) + x_drop_costs_diag
+        path_z_ = list_min(coordinates_z_, keys=neighbors_z_)
+
+        # DP 2: coming to -x
+        neighbors__x = [neigh_up_pos]
+        coordinates__x = [coord_up_pos]
+        diag__x = list_min(neighbors__x) + z_drop_costs_diag
+        path__x = list_min(coordinates__x, keys=neighbors__x)
+
+        # DP 3: coming to --
+        neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]]
+        coordinates___ = [coord_left_neg, coord_up_neg]
+        diag___ = list_min(neighbors___)
+        path___ = list_min(coordinates___, neighbors___)
+
+        # Aggregating all the dimensions of DP together
+        diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1)
+        path = torch.stack([path_zx, path_z_, path__x, path___], -2)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1
+            )  # [4, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 4, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)],
+                dim=-1,
+            )  # [4, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]])
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                # current_N = Ns[orig_b.item()] + 1
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 1], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = x1_dropcost[..., None]
+    diag_p_col = z1_dropcost[..., None]
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 1, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 1, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 1, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 1, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+
+        coord_up, coord_left, coord_diag = (
+            coord_p[:, :-1].clone(),
+            coord_p[:, 1:].clone(),
+            coord_pp[:, pp_start : (pp_start + size)].clone(),
+        )
+        # assign the right state to coordinates
+        coord_diag[..., 2] = 0
+        coord_left[..., 2] = 1
+        coord_up[..., 2] = 2
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors = [
+            neigh_diag + match_costs_diag[..., None],
+            neigh_left + x_drop_costs_diag[..., None],
+            neigh_up + z_drop_costs_diag[..., None],
+        ]
+        coordinates = [coord_diag, coord_left, coord_up]
+        diag = list_min(neighbors)[..., None]
+        path = (list_min(coordinates, keys=neighbors))[..., None, :]
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = x_drop_cost[..., None]
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(1) * (effective_d - 1), torch.zeros(1), torch.ones(1) * 1], dim=-1
+            )  # [1, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 1, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = z_drop_cost[..., None]
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(1) * (effective_d - 1), torch.ones(1) * (last_r_p - 1), torch.ones(1) * 2],
+                dim=-1,
+            )  # [1, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 1, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 1, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                this_paths = [p[b.item()] for p in all_paths]
+                current_N = N + 1
+                dc, rc, _ = coord_p[b, r][0]
+                tracebacks[orig_b.item()] = nw_diag_traceback(dc, rc, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def batch_drop_dtw_machine(zx_costs_list, x_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 2], device=dev)  # diag at i-2
+    x1_dropcost = all_cum_x_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 2, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 2, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 2, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 2, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]]
+
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+        coord_up_pos, coord_left_pos = coord_up[..., [0], :], coord_left[..., [0], :]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        pos_neighbors = [neigh_diag]
+        pos_coordinates = [coord_diag]
+        if one_to_many:
+            pos_neighbors.append(neigh_left_pos if contiguous else neigh_left)
+            pos_coordinates.append(coord_left_pos if contiguous else coord_left)
+        if many_to_one:
+            pos_neighbors.append(neigh_up)
+            pos_coordinates.append(coord_up)
+        diag_pos = list_min(pos_neighbors) + match_costs_diag
+        path_pos = list_min(pos_coordinates, keys=pos_neighbors)
+
+        neg_neighbors = [neigh_left]
+        neg_coordinates = [coord_left]
+        diag_neg = list_min(neg_neighbors) + x_drop_costs_diag
+        path_neg = list_min(neg_coordinates, keys=neg_neighbors)
+
+        diag = torch.stack([diag_pos, diag_neg], -1)
+        path = torch.stack([path_pos, path_neg], -2)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(2) * (effective_d - 1), torch.zeros(2), torch.arange(2)], dim=-1
+            )  # [2, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 2, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            pad = torch.stack([batch_inf, batch_inf], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(2) * (effective_d - 1), torch.ones(2) * (last_r_p - 1), torch.arange(2)],
+                dim=-1,
+            )  # [2, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 2, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_cum_x_drop_costs, batch_inf = [
+                t[~mask] for t in [all_x_drop_costs, all_cum_x_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def fast_batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True
+):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+
+    # create routing masks for selection
+    # 4x3 corresponds to 4 states (zx, z-, -x, --) and 3 neighbors (l, d, u)
+    zx_mask = torch.zeros((4, 3))
+    zx_mask[:, 1] = 1
+    if one_to_many:
+        zx_mask[0, 0] = 1
+        if not contiguous:
+            zx_mask[1, 0] = 1
+    if many_to_one:
+        zx_mask[[0, 2], 2] = 1
+
+    z__mask = torch.zeros((4, 3))
+    z__mask[[0, 1], 0] = 1
+
+    _x_mask = torch.zeros((4, 3))
+    _x_mask[[0, 2], 2] = 1
+
+    ___mask = torch.zeros((4, 3))
+    ___mask[[2, 3], 0] = 1
+    ___mask[[1, 3], 2] = 1
+
+    mask = torch.stack([zx_mask, z__mask, _x_mask, ___mask], dim=-1).to(dev).to(dtype)  # [4, 3, 4]
+
+    def transition(
+        neigh_left, neigh_diag, neigh_up, coord_left, coord_diag, coord_up, match_costs, x_drop_costs, z_drop_costs
+    ):
+        all_neigh = torch.stack([neigh_left, neigh_diag, neigh_up], dim=-1)  # [B, d, 4, 3]
+        all_coords = torch.stack([coord_left, coord_diag, coord_up], dim=-1).permute(
+            [0, 1, 3, 2, 4]
+        )  # [B, d, 3, 4, 3], the first 3 is the spatial dimension of coordinates
+        additions_zx = match_costs[..., None].repeat([1, 1, 3])  # [B, d, 3]
+        additions_z_ = x_drop_costs[..., None].repeat([1, 1, 3])
+        additions__x = z_drop_costs[..., None].repeat([1, 1, 3])
+        additions___ = torch.stack([x_drop_costs, match_costs, z_drop_costs], dim=-1)
+        additions = torch.stack([additions_zx, additions_z_, additions__x, additions___], dim=-1)  # [B, d, 3, 4]
+
+        inverse_mask = (~(mask[None, None, ...].to(bool))).to(dtype)
+        filtered_costs = all_neigh[..., None] * mask[None, None, ...] + inverse_mask * inf[0]  #  [B, d, 4, 3, 4]
+        full_costs = filtered_costs + additions[:, :, None, :, :] * mask[None, None, ...]
+        B, d = full_costs.shape[:2]
+        the_min = full_costs.reshape([B, d, -1, 4]).min(dim=2)
+        new_diag = the_min.values
+
+        all_coords = all_coords[..., None].repeat([1, 1, 1, 1, 1, 4]).reshape([B, d, 3, -1, 4])
+        argmins = the_min.indices[:, :, None, None, :].repeat([1, 1, 3, 1, 1])
+        pointers = torch.gather(all_coords, index=argmins, dim=-2)
+        pointers = pointers[:, :, :, 0, :].permute([0, 1, 3, 2])
+        return new_diag, pointers
+
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        diag, path = transition(
+            neigh_left,
+            neigh_diag,
+            neigh_up,
+            coord_left,
+            coord_diag,
+            coord_up,
+            match_costs_diag,
+            x_drop_costs_diag,
+            z_drop_costs_diag,
+        )
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1
+            )  # [4, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 4, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)],
+                dim=-1,
+            )  # [4, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            local_mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(local_mask, as_tuple=False)[:, 0], Rs[local_mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]])
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                # current_N = Ns[orig_b.item()] + 1
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~local_mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~local_mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~local_mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+if __name__ == '__main__':
+    zx_costs = np.random.rand(3, 4) # K=3 steps, N=4 clips
+    # zx_costs = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]])
+    drop_costs = np.random.rand(4)
+    align = drop_dtw(zx_costs, drop_costs)
+    #breakpoint()
diff --git a/anet_clip/backup/pdvc/dp/soft_dp.py b/anet_clip/backup/pdvc/dp/soft_dp.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d5c17e5d5eeff50254dc7b8d31f6d43b253e388
--- /dev/null
+++ b/anet_clip/backup/pdvc/dp/soft_dp.py
@@ -0,0 +1,617 @@
+import numpy as np
+import torch
+import math
+from torch import log, exp
+import torch.nn.functional as F
+from copy import copy
+
+from pdvc.dp.dp_utils import VarTable, minGamma, minProb, pad_costs, prob_min, unique_softmax, cosine_sim
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def softDTW(
+    step_features,
+    frame_features,
+    labels,
+    dist_type="inner",
+    softning="prob",
+    gamma_min=0.1,
+    gamma_xz=0.1,
+    step_normalize=True,
+):
+    """function to obtain a soft (differentiable) version of DTW
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the function
+    _min_fn = minProb if softning == "prob" else minGamma
+    min_fn = lambda x: _min_fn(x, gamma=gamma_min)
+
+    # first get a pairwise distance matrix
+    if dist_type == "inner":
+        dist = step_features @ frame_features.T
+    else:
+        dist = cosine_sim(step_features, frame_features)
+    if step_normalize:
+        if labels is not None:
+            norm_dist = unique_softmax(dist, labels, gamma_xz)
+        else:
+            norm_dist = torch.softmax(dist / gamma_xz, 0)
+        dist = -log(norm_dist)
+
+    # initialize soft-DTW table
+    nrows, ncols = dist.shape
+    # sdtw = torch.zeros((nrows+1,ncols+1)).to(torch.float).to(device)
+    sdtw = VarTable((nrows + 1, ncols + 1))
+    for i in range(1, nrows + 1):
+        sdtw[i, 0] = 9999999999
+    for j in range(1, ncols + 1):
+        sdtw[0, j] = 9999999999
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for i in range(1, nrows + 1):
+        for j in range(1, ncols + 1):
+            neighbors = torch.stack([sdtw[i, j - 1], sdtw[i - 1, j - 1], sdtw[i - 1, j]])
+            di, dj = i - 1, j - 1  # in the distance matrix indices are shifted by one
+            new_val = dist[di, dj] + min_fn(neighbors)
+            sdtw[i, j] = torch.squeeze(new_val, 0)
+    sdtw_loss = sdtw[nrows, ncols] / step_features.shape[0]
+    return sdtw_loss, sdtw, dist
+
+
+def dropDTW(zx_costs, drop_costs, softning="prob", exclusive=True, contiguous=True, gamma_min=1):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the min function
+    min_fn = minProb if softning == "prob" else minGamma
+    inf = 9999999999
+    K, N = zx_costs.shape
+    exclusive = exclusive if K <= N else False
+    cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 3))  # This corresponds to B 3-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf
+    for xi in range(1, N + 1):
+        D[0, xi] = torch.zeros_like(D[0, xi]) + cum_drop_costs[xi - 1]
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1]
+            dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3]
+
+            # positive transition, i.e. matching x_i to z_j
+            if contiguous:
+                pos_neighbors = [d_diag, dp_left]
+            else:
+                pos_neighbors = [d_diag, d_left]
+            if not exclusive:
+                pos_neighbors.append(dp_up)
+
+            Dp = min_fn(pos_neighbors, gamma=gamma_min) + zx_costs[z_cost_ind, x_cost_ind]
+
+            # negative transition, i.e. dropping xi
+            Dm = d_left + drop_costs[x_cost_ind]
+
+            # update final solution matrix
+            D_final = min_fn([Dm, Dp], gamma=gamma_min)
+            D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_cost = D[K, N][0]
+    return min_cost, D
+
+
+def batch_dropDTW(
+    zx_costs_list, drop_costs_list, softning="prob", exclusive=True, contiguous=True, drop_mode="DropDTW", gamma_min=1
+):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the min function
+    min_fn = minProb if softning == "prob" else minGamma
+    inf = 9999999999
+
+    # pre-processing
+    B = len(zx_costs_list)
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list)
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+    N, K = max(Ns), max(Ks)
+
+    # preparing padded tables
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], []
+    for i in range(B):
+        zx_costs = zx_costs_list[i]
+        drop_costs = drop_costs_list[i]
+        cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+        # padding everything to the size of the largest N and K
+        row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device)
+        padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad]))
+        padded_drop_costs.append(torch.cat([drop_costs, row_pad]))
+        multirow_pad = torch.stack([row_pad + inf] * Ks[i], dim=0)
+        padded_table = torch.cat([zx_costs, multirow_pad], dim=1)
+        rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + inf
+        padded_table = torch.cat([padded_table, rest_pad], dim=0)
+        padded_zx_costs.append(padded_table)
+
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 3, B))  # This corresponds to B 3-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf
+    for xi in range(1, N + 1):
+        if drop_mode == "DropDTW":
+            D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi]
+        elif drop_mode == "OTAM":
+            D[0, xi] = torch.zeros_like(D[0, xi])
+        else:  # drop_mode == 'DTW'
+            D[0, xi] = torch.zeros_like(D[0, xi]) + inf
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1]
+            dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3]
+
+            if drop_mode == "DropDTW":
+                # positive transition, i.e. matching x_i to z_j
+                if contiguous:
+                    pos_neighbors = [d_diag, dp_left]
+                else:
+                    pos_neighbors = [d_diag, d_left]
+                if not exclusive:
+                    pos_neighbors.append(dp_up)
+
+                Dp = min_fn(pos_neighbors, gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind]
+
+                # negative transition, i.e. dropping xi
+                Dm = d_left + all_drop_costs[x_cost_ind]
+
+                # update final solution matrix
+                D_final = min_fn([Dm, Dp], gamma=gamma_min)
+            else:
+                d_right = D[zi - 1, xi][0:1]
+                D_final = Dm = Dp = (
+                    min_fn([d_diag, d_left, d_right], gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind]
+                )
+            D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_costs = []
+    for i in range(B):
+        Ni, Ki = Ns[i], Ks[i]
+        min_cost_i = D[Ki, Ni][0, i]
+        min_costs.append(min_cost_i / Ni)
+
+    return min_costs, D
+
+
+def batch_double_dropDTW(zx_costs_list, drop_costs_list, gamma_min=1):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    min_fn = lambda x: minProb(x, gamma=gamma_min)
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+
+    # assuming sequences are the same length
+    B = len(zx_costs_list)
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list)
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+    N, K = max(Ns), max(Ks)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 4, B), dtype, dev)  # This corresponds to B 4-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + all_cum_drop_costs[(zi - 1) : zi]
+    for xi in range(1, N + 1):
+        D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi]
+
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1, 2, 3]  # zx, z-, -x, --
+            diag_neigh_costs = [D[zi - 1, xi - 1][s] for s in diag_neigh_states]
+
+            left_neigh_states = [0, 1]  # zx and z-
+            left_neigh_costs = [D[zi, xi - 1][s] for s in left_neigh_states]
+
+            upper_neigh_states = [0, 2]  # zx and -x
+            upper_neigh_costs = [D[zi - 1, xi][s] for s in upper_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # DP 0: coming to zx
+            neigh_costs_zx = diag_neigh_costs + upper_neigh_costs + left_neigh_costs
+            D0 = min_fn(neigh_costs_zx) + all_zx_costs[z_cost_ind, x_cost_ind]
+
+            # DP 1: coming to z-
+            neigh_costs_z_ = left_neigh_costs
+            D1 = min_fn(neigh_costs_z_) + all_drop_costs[x_cost_ind]
+
+            # DP 2: coming to -x
+            neigh_costs__x = upper_neigh_costs
+            D2 = min_fn(neigh_costs__x) + all_drop_costs[z_cost_ind]
+
+            # DP 3: coming to --
+            costs___ = [d + all_drop_costs[z_cost_ind] * 2 for d in diag_neigh_costs] + [
+                D[zi, xi - 1][3] + all_drop_costs[x_cost_ind],
+                D[zi - 1, xi][3] + all_drop_costs[z_cost_ind],
+            ]
+            D3 = min_fn(costs___)
+
+            D[zi, xi] = torch.cat([D0, D1, D2, D3], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_costs = []
+    for i in range(B):
+        min_cost_i = min_fn(D[K, N][:, i])
+        min_costs.append(min_cost_i / N)
+    return min_costs, D
+
+
+def drop_dtw_machine(zx_costs, drop_costs, gamma_min=1, exclusive=True, contiguous=True):
+    K, N = zx_costs.shape
+    dev = zx_costs.device
+    flipped_costs = torch.flip(zx_costs, [0])  # flip the cost matrix upside down
+    cum_drop_costs = torch.cumsum(drop_costs, dim=-1)
+
+    # initialize first two contr diagonals
+    inf = torch.tensor([9999999999], device=dev, dtype=zx_costs.dtype)
+    diag_pp = torch.zeros([1, 2], device=dev)  # diag at i-2
+    diag_p_col = torch.ones([1, 2], device=dev) * inf
+    diag_p_row = torch.stack([inf, cum_drop_costs[[0]]], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 0)  # diag at i-1
+
+    for i in range(K + N - 1):
+        size = diag_p.size(0) - 1
+        pp_start = max(0, diag_pp.size(0) - diag_p.size(0))
+        neigh_up, neigh_left, neigh_diag = diag_p[:-1], diag_p[1:], diag_pp[pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[:, [0]], neigh_left[:, [0]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.flip(torch.diag(flipped_costs, i + 1 - K), [-1])
+        d_start, d_end = max(1 - K + i, 0), min(i, N - 1) + 1
+        drop_costs_diag = torch.flip(drop_costs[d_start:d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+        pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left]
+        if not exclusive:
+            pos_neighbors.append(neigh_up_pos)
+        diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag
+        diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag
+        diag = torch.stack([diag_pos, diag_neg], -1)
+
+        # add the initialization values on the ends of diagonal if needed
+        if i < N - 1:
+            # fill in 0th row with [drop_cost, inf]
+            pad = torch.stack([inf, cum_drop_costs[[i + 1]]], -1)
+            diag = torch.cat([pad, diag])
+        if i < K - 1:
+            # fill in 0th col with [inf, inf]
+            pad = torch.stack([inf, inf], -1)
+            diag = torch.cat([diag, pad])
+
+        diag_pp = diag_p
+        diag_p = diag
+    assert (diag.size(0) == 1) and (diag.size(1) == 2), f"Last diag shape is {diag.shape} instead of [1, 2]"
+
+    cost = prob_min(diag, gamma_min)
+    return cost
+
+
+def batch_drop_dtw_machine(zx_costs_list, drop_costs_list, gamma_min=1, exclusive=True, contiguous=True):
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    # For samples where K > N, exclusive computation is not possible
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+    persample_exclusive = torch.tensor([Ni >= Ki for Ki, Ni in shapes]).to(dev)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # define costs in tensors
+    all_zx_costs = [F.pad(c, [0, N - c.shape[1], 0, K - c.shape[0]]) for c in zx_costs_list]
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0)
+    all_cum_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. Here, 0 is keep, 1 is drop.
+     """
+    # initialize first two contr diagonals
+    batch_inf, batch_ones = torch.stack([inf] * B, 0), torch.ones([B, 1], device=dev, dtype=dtype)
+    diag_pp = torch.zeros([B, 1, 2], device=dev)  # diag at i-2
+    diag_p_col = torch.ones([B, 1, 2], device=dev) * batch_inf[..., None]
+    diag_p_row = torch.stack([batch_inf, all_cum_drop_costs[:, [0]]], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The pathlength path is also a diagonal representation that carries the optimal pathlength to each point
+    with torch.no_grad():
+        path_pp = torch.zeros([B, 1, 2], device=dev, dtype=dtype)
+        path_p = torch.ones([B, 2, 2], device=dev, dtype=dtype)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)
+    path_lens = torch.zeros(B).to(dtype=dtype).to(device=dev)
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]]
+
+        neigh_path_up, neigh_path_left, neigh_path_diag = (
+            path_p[:, :-1],
+            path_p[:, 1:],
+            path_pp[:, pp_start : (pp_start + size)],
+        )
+        neigh_path_up_pos, neigh_path_left_pos = neigh_path_up[..., [0]], neigh_path_left[..., [0]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        d_start, d_end = max(1 - K + d, 0), min(d, N - 1) + 1
+        drop_costs_diag = torch.flip(all_drop_costs[:, d_start:d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+        pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left]
+        pos_path_neighbors = (
+            [neigh_path_diag, neigh_path_left_pos] if contiguous else [neigh_path_diag, neigh_path_left]
+        )
+        if exclusive and (~persample_exclusive).any():
+            # apply non-exclusive rule for some batch elements, via masing out the exclusive elements with inf
+            masked_neigh_up_pos = neigh_up_pos + persample_exclusive[:, None, None] * batch_inf[:, None]
+            pos_neighbors.append(masked_neigh_up_pos)
+
+            pos_path_neighbors.append(neigh_path_up_pos * (~persample_exclusive[:, None, None]))
+        elif not exclusive:
+            # apply standard non-exclusive rule to all batch elements
+            pos_neighbors.append(neigh_up_pos)
+            pos_path_neighbors.append(neigh_path_up_pos)
+
+        # DP Table update
+        diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag
+        diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag
+        diag = torch.stack([diag_pos, diag_neg], -1)
+
+        # Path Table Update
+        with torch.no_grad():
+            path_pos = prob_min(pos_path_neighbors, gamma_min, pos_neighbors) + 1
+            path_neg = prob_min([neigh_path_left], gamma_min, [neigh_left]) + 1
+            path = torch.stack([path_pos, path_neg], -1)
+
+        # add the initialization values on the ends of diagonal if needed
+        if d < N - 1:
+            # fill in DP table's 0th row with [drop_cost, inf]
+            pad_d = torch.stack([batch_inf, all_cum_drop_costs[:, [d + 1]]], -1)
+            diag = torch.cat([pad_d, diag], 1)
+
+            # fill in Path table's 0th row with [d, inf]
+            pad_p = torch.stack([batch_inf, torch.zeros_like(batch_inf) + d], -1)
+            path = torch.cat([pad_p, path], 1)
+
+        if d < K - 1:
+            # fill in DP table's 0th col with [inf, inf]
+            pad_d = torch.stack([batch_inf, batch_inf], -1)
+            diag = torch.cat([diag, pad_d], 1)
+
+            # fill in Path table's 0th row with [d, inf]
+            pad_p = pad_d
+            path = torch.cat([path, pad_p], 1)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        path_pp = path_p
+        path_p = path
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min)
+            path_lens[orig_mask] = path_lens[orig_mask] + prob_min([path[bs, rs]], gamma_min, [diag[bs, rs]])
+
+            diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs]
+            ]
+            all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive = [
+                t[~mask] for t in [all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive]
+            ]
+            if torch.numel(Ds) == 0:
+                break
+
+    # costs = prob_min([diag], gamma_min)
+    costs_norm = min_costs / path_lens
+    return min_costs, path_lens
+
+
+def batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, gamma_min=1, exclusive=True, contiguous=True
+):
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    Ns, Ks = [], []
+    for i in range(B):
+        Ki, Ni = zx_costs_list[i].shape
+        if exclusive and Ki >= Ni:
+            # in case the number of steps is greater than the number of frames,
+            # duplicate every frame and let the drops do the job.
+            mult = math.ceil(Ki / Ni)
+            zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1])
+            x_drop_costs_list[i] = torch.stack([x_drop_costs_list[i]] * mult, dim=-1).reshape([-1])
+            Ni *= mult
+        Ns.append(Ni)
+        Ks.append(Ki)
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]]
+        neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors_zx = [neigh_diag, neigh_left_pos[..., [0]]] if contiguous else [neigh_diag, neigh_left_pos]
+        if not exclusive:
+            neighbors_zx.append(neigh_up_pos)
+        diag_zx = prob_min(neighbors_zx, gamma_min) + match_costs_diag
+
+        # DP 1: coming to z-
+        neighbors_z_ = [neigh_left_pos]
+        diag_z_ = prob_min(neighbors_z_, gamma_min) + x_drop_costs_diag
+
+        # DP 2: coming to -x
+        neighbors__x = [neigh_up_pos]
+        diag__x = prob_min(neighbors__x, gamma_min) + z_drop_costs_diag
+
+        # DP 3: coming to --
+        neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]]
+        diag___ = prob_min(neighbors___, gamma_min)
+
+        # Aggregating all the dimensions of DP together
+        diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        if d < N - 1:
+            # fill in 0th row with [drop_cost, inf]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([pad, diag], 1)
+        if d < K - 1:
+            # fill in 0th col with [inf, inf]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], 1)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min)
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs = [
+                t[~mask] for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs]
+            ]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    costs_norm = min_costs / torch.tensor(Ns).to(dev)
+    return costs_norm
+
+
+if __name__ == "__main__":
+    from exact_dp import double_drop_dtw
+
+    K, N = 7, 15
+    zx_costs = torch.normal(torch.ones([K, N]))
+    x_drop_costs = zx_costs.mean(0)
+    z_drop_costs = zx_costs.mean(1)
+
+    min_cost, *_ = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy())
+    my_costs = batch_double_drop_dtw_machine([zx_costs], [x_drop_costs], [z_drop_costs], gamma_min=0)
+    print(my_costs * N, min_cost)
diff --git a/anet_clip/backup/pdvc/dp/visualization.py b/anet_clip/backup/pdvc/dp/visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..fed631a8979532253408fb402672eee0cc4a9a64
--- /dev/null
+++ b/anet_clip/backup/pdvc/dp/visualization.py
@@ -0,0 +1,179 @@
+import io
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib.pyplot import figure
+from PIL import Image
+
+
+# defining the colors and shapes
+color_code = [
+    "blue",
+    "orange",
+    "green",
+    "red",
+    "purple",
+    "brown",
+    "pink",
+    "grey",
+    "olive",
+    "cyan",
+    "lime",
+    "grey",
+    "firebrick",
+    "coral",
+    "chocolate",
+    "saddlebrown",
+    "bisque",
+    "goldenrod",
+    "gold",
+    "khaki",
+    "darkolivegreen",
+    "greenyellow",
+    "palegreen",
+    "springgreen",
+    "aquamarine",
+    "teal",
+    "deepskyblue",
+    "navy",
+    "mediumslateblue",
+    "royalblue",
+    "indigo",
+    "magenta",
+    "deeppink",
+    "crimson",
+    "violet",
+    "snow",
+    "lightgrey",
+    "wheat",
+    "dodgerblue",
+    "darkseagreen",
+]
+color_code = color_code * 10
+shape_code = ["o", "s", "P", "*", "h", ">", "X", "d", "D", "v", "<", "p"]
+shape_code = shape_code * int(len(color_code) / len(shape_code) + 1)
+
+color_values = []
+for color in color_code:
+    _ = plt.fill([0, 0, 1, 1, 0], [0, 1, 1, 0, 0], color)
+    buf = io.BytesIO()
+    _ = plt.savefig(buf, format="png")
+    _ = plt.close()
+    buf.seek(0)
+    img = np.array(Image.open(buf).convert("RGB"))
+    color_values.append(img[100, 300])
+
+color_code_hex = []
+for color_value in color_values:
+    step_color_rgb = tuple([s.item() for s in color_value])
+    color_code_hex.append("#%02x%02x%02x" % step_color_rgb)
+
+
+def plot_alignment(
+    step_ids, frame_labels, step_colors, step_shapes, size=(15, 2), name="all_step_to_video", to_np=True, grid_on=True
+):
+    N_steps = len(frame_labels)
+
+    plt.rcParams["figure.figsize"] = (size[0], size[1])
+    ax = plt.subplot(1, 1, 1)
+    _ = ax.set_title(name)
+
+    tick_freq = 50 if N_steps > 1500 else 20
+    _ = plt.xticks(np.arange(0, N_steps, tick_freq))
+    _ = plt.xlim(0, N_steps)
+    _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True)
+
+    if grid_on:
+        _ = plt.grid()
+    else:
+        plt.plot(np.arange(len(frame_labels)), [1] * len(frame_labels), color="grey")
+
+    for si, step_id in enumerate(step_ids):
+        time, val = [], []
+        for i in range(N_steps):
+            if si + 1 == frame_labels[i]:
+                time.append(i)
+                val.append(1)
+        time, val = np.array(time), np.array(val)
+        _ = plt.plot(time, val, step_shapes[step_id], color=step_colors[step_id])
+
+    if to_np:
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        plt.close()
+        buf.seek(0)
+        img = np.array(Image.open(buf).convert("RGB"))
+        return img
+    else:
+        return plt
+
+
+def plot_step_to_video_alignment(corresp_mat, size=(15, 2)):
+    """corresp_mat is of shape [K, N], where K is num_steps, and N is video_len"""
+    step_ids = np.arange(corresp_mat.size(0)) + 1
+    labels = corresp_mat.to(float).argmax(0) + 1 * corresp_mat.to(bool).any(0)
+
+    K_present = corresp_mat.to(bool).any(1).to(int).sum().item()
+    name = f"Video Segmentation | {K_present} steps present"
+    return plot_alignment(step_ids, labels, color_code, shape_code, name=name, size=size)
+
+
+def plot_similarities(
+    sim,
+    drop_line=None,
+    colors=None,
+    select=None,
+    color_offset=0,
+    do_legend=True,
+    name="",
+    size=(15, 2),
+    grid_on=True,
+    to_np=True,
+    linewidth=1,
+):
+    colors = colors if colors is not None else color_code
+    K, N = sim.shape
+    select = select if select is not None else np.arange(K)
+
+    plt.rcParams["figure.figsize"] = (size[0], size[1])
+    ax = plt.subplot(1, 1, 1)
+    _ = ax.set_title(name)
+
+    _ = plt.xticks(np.arange(0, N, 20))
+    _ = plt.xlim(0, N)
+    _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True)
+    if grid_on:
+        _ = plt.grid()
+
+    for i in range(K):
+        if i in select:
+            _ = plt.plot(np.arange(N), sim[i], color=colors[i + color_offset], label=str(i), linewidth=linewidth)
+
+    if drop_line is not None:
+        _ = plt.plot(np.arange(N), drop_line * np.ones(N), "--")
+
+    if do_legend:
+        _ = plt.xlim(0, N + int(0.10 * N))
+        plt.legend()
+
+    if to_np:
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        plt.close()
+        buf.seek(0)
+        img = np.array(Image.open(buf).convert("RGB"))
+        return img
+    else:
+        return plt
+
+
+def plot_gt_seg(N, starts, ends, colors=None, shapes=None, name="GT Seg", clip_len=1, size=(15, 2), grid_on=True):
+    colors = colors if colors is not None else color_code
+    shapes = shapes if shapes is not None else shape_code
+
+    K = len(starts)
+    labels = -np.ones(N)
+    for i in range(K):
+        s, e = int(starts[i]), int(ends[i])
+        labels[s : e + 1] = i
+    step_ids = np.arange(K)
+    return plot_alignment(step_ids, labels, colors, shapes, to_np=False, name=name, size=size, grid_on=grid_on)
diff --git a/anet_clip/backup/pdvc/matcher.py b/anet_clip/backup/pdvc/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..3311680756df6cf1efeed2bbe2ab55350525b4ce
--- /dev/null
+++ b/anet_clip/backup/pdvc/matcher.py
@@ -0,0 +1,446 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Modules to compute the matching cost and solve the corresponding LSAP.
+"""
+import torch
+from scipy.optimize import linear_sum_assignment
+from torch import nn
+import torch.nn.functional as F
+from torch import log, exp
+import numpy as np
+
+from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou
+
+# For matcher_align
+from pdvc.dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine
+from pdvc.dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine
+from pdvc.dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine
+from pdvc.dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine
+# from dp.gpu_nw import gpu_nw
+from pdvc.dp.dp_utils import compute_all_costs, compute_double_costs
+
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+class HungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    def __init__(self,
+                 cost_class: float = 1,
+                 cost_bbox: float = 1,
+                 cost_giou: float = 1,
+                 cost_alpha = 0.25,
+                 cost_gamma = 2,
+                 use_pseudo_box = False):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        # self.cost_caption = cost_caption
+        self.cost_alpha = cost_alpha
+        self.cost_gamma = cost_gamma
+        self.use_pseudo_box = use_pseudo_box
+
+        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0 # or cost_caption!=0, "all costs cant be 0"
+        # breakpoint()
+
+    def forward(self, outputs, targets, verbose=False, many_to_one=False):
+        """ Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        with torch.no_grad():
+            bs, num_queries = outputs["pred_logits"].shape[:2]
+            # We flatten to compute the cost matrices in a batch
+            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
+            out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+            # Also concat the target labels and boxes
+            tgt_ids = torch.cat([v["labels"] for v in targets])
+            if self.use_pseudo_box and self.training:
+                # print('use pseudo box')
+                tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets])
+            else:
+                tgt_bbox = torch.cat([v["boxes"] for v in targets])
+                # print('use gt box')
+
+            # Compute the classification cost.
+            # alpha = 0.25
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+            # Compute the L1 cost between boxes
+            cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+            # breakpoint()
+
+            # Compute the giou cost betwen boxes
+            try:
+                cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox),
+                                        box_cl_to_xy(tgt_bbox))
+            except:
+                print('out_bbox', out_bbox)
+                print('tgt_bbox', tgt_bbox)
+                breakpoint()
+
+            # cost_caption = outputs['caption_costs'].flatten(0, 1)
+
+            # Final cost matrix
+            # breakpoint()
+            try: # [100, 10], [100, 11], [100, 10]
+                C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
+            except:
+                breakpoint()
+
+            costs = {'cost_bbox': cost_bbox,
+                     'cost_class': cost_class,
+                     'cost_giou': cost_giou,
+                     # 'cost_caption': cost_caption,
+                     'out_bbox': out_bbox[:, 0::2]}
+
+            if verbose:
+                print('\n')
+                print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0])
+                print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0])
+                print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0])
+                # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0])
+
+            C = C.view(bs, num_queries, -1).cpu()
+
+        
+            sizes = [len(v["boxes_pseudo"]) for v in targets] if self.use_pseudo_box else [len(v["boxes"]) for v in targets]
+            # pdb.set_trace()
+            indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+            m2o_rate = 4
+            rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))]
+            rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in
+                       enumerate(rl_indices)]
+
+            indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+
+            if verbose:
+                print('------matching results:')
+                print(indices)
+                for indice in indices:
+                    for i, j in zip(*indice):
+                        print(out_bbox[i][0::2], tgt_bbox[j][0::2])
+                print('-----topK scores:')
+                topk_indices = out_prob.topk(10, dim=0)
+                print(topk_indices)
+                for i,(v,ids) in enumerate(zip(*topk_indices)):
+                    print('top {}'.format(i))
+                    s= ''
+                    for name,cost in costs.items():
+                        s += name + ':{} '.format(cost[ids])
+                    print(s)
+
+            return indices, rl_indices
+
+class DTWMatcher(nn.Module):
+    '''
+    Drop_z: if True, then we drop both the x axis (query) and z axis (text)
+    One_to_many: multiple x match to one z
+    Many_to_one: multiple z match to one x 
+    '''
+    def __init__(self,
+                keep_percentile,
+                top_band_size=0,
+                given_droplines=None,
+                drop_z=True,
+                one_to_many=False,
+                many_to_one=False,
+                contiguous=False):
+        super().__init__()
+        self.keep_percentile = keep_percentile
+        self.top_band_size = top_band_size
+        self.given_droplines = given_droplines
+        self.drop_z = drop_z
+        self.one_to_many = one_to_many
+        self.many_to_one = many_to_one
+        self.contiguous = contiguous
+
+    def forward(self, ouputs, targets, text_embed, event_embed):
+        # computing alignments (without gradients)
+        orig_device = event_embed[0].device
+        # embarisingly, this is faster on CPU than on GPU!
+        sims = compute_sim(text_embed, event_embed, l2_norm=True)
+        #sims = [s.cpu() for s in sims]
+        sims = [sims.cpu()]
+        # TODO: Add the classification cost the the alignment cost
+        self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines]
+        with torch.no_grad():
+            zx_costs_list = []
+            x_drop_costs_list = []
+            z_drop_costs_list = []
+            for i, sim in enumerate(sims):
+                # computing the baseline logit
+                top_sim = sim
+                if self.given_droplines is None:
+                    if self.top_band_size > 0 and self.top_band_size < sim.shape[1]:
+                        top_sim = sim.topk(self.top_band_size, dim=1).values
+
+                    if self.keep_percentile > 1:
+                        dropline = top_sim.min() - 5
+                    else:
+                        k = max([1, int(torch.numel(top_sim) * self.keep_percentile)])
+                        dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach()
+                else:
+                    dropline = self.given_droplines[i]
+
+                # shift the costs by the drop logits, so I can set drop costs to 0 instead
+                zx_costs_list.append(dropline.reshape([1, 1]) - sim)
+                z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device)
+                x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device)
+                z_drop_costs_list.append(z_drop_cost)
+                x_drop_costs_list.append(x_drop_cost)
+
+            # TODO figure out if one_to_many and many_to_one should be on
+            align_paths, corresp_mats = None, None
+            if self.drop_z:
+                if not (self.one_to_many or self.many_to_one):
+                    _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                    # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                else:
+                    _, align_paths = exact_batch_double_drop_dtw_machine(
+                        # _, align_paths = fast_batch_double_drop_dtw_machine(
+                        zx_costs_list,
+                        x_drop_costs_list,
+                        z_drop_costs_list,
+                        one_to_many=self.one_to_many,
+                        many_to_one=self.many_to_one,
+                        contiguous=self.contiguous,
+                    )
+            else:
+                _, align_paths = exact_batch_drop_dtw_machine(
+                    zx_costs_list,
+                    x_drop_costs_list,
+                    one_to_many=self.one_to_many,
+                    many_to_one=self.many_to_one,
+                    contiguous=self.contiguous,
+                )
+
+            if corresp_mats is None:
+                corresp_matrices = []
+                for b_id, sim in enumerate(sims):
+                    corresp_matrix = torch.zeros_like(sim)
+                    for i, j, s in align_paths[b_id]:
+                        if s == 0:
+                            corresp_matrix[i - 1, j - 1] = 1
+                    corresp_matrices.append(corresp_matrix.to(orig_device))
+                    # corresp_matrices.append(corresp_matrix)
+            text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0])
+            query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0])
+            text_indices, rearrange = torch.sort(text_indices)
+            query_indices = query_indices[rearrange]
+            indices = [(query_indices, text_indices)]
+        #return align_paths, corresp_matrices
+        return indices, []
+
+class SimMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+    based on the similarity bewteen text embedding and query embedding
+    """
+    def __init__(self,
+                 cost_class: float = 1,
+                 cost_sim: float = 1,
+                 cost_bbox: float = 1,
+                 cost_giou: float = 1,
+                 cost_alpha = 0.25,
+                 cost_gamma = 2,
+                 use_pseudo_box = False):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_sim = cost_sim
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        # self.cost_caption = cost_caption
+        self.cost_alpha = cost_alpha
+        self.cost_gamma = cost_gamma
+        self.use_pseudo_box = use_pseudo_box
+
+        assert cost_class != 0 or cost_sim!=0, "all costs cannot be 0"
+        # breakpoint()
+
+    def forward(self, outputs, targets, text_embed, event_embed, verbose=False, many_to_one=False):
+        """ Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        with torch.no_grad():
+            bs, num_queries = outputs["pred_logits"].shape[:2]
+
+            # We flatten to compute the cost matrices in a batch
+            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
+            out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+            tgt_ids = torch.cat([v["labels"] for v in targets])
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+            # Also concat the target labels and boxes
+            # breakpoint()
+            if self.use_pseudo_box:
+                tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets])
+                # Compute the L1 cost between boxes
+                cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+                # Compute the giou cost betwen boxes
+                cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox),
+                                                box_cl_to_xy(tgt_bbox))
+            else:
+                cost_bbox = torch.zeros_like(cost_class)
+                cost_giou = torch.zeros_like(cost_class)
+
+            # Compute the classification cost.
+            # alpha = 0.25
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+            # breakpoint()
+            # Compute the similarity cost
+            cost_sim = compute_sim(text_embed, event_embed, l2_norm=True).permute(1,0)
+            cost_sim = torch.ones_like(cost_sim) - cost_sim
+            # breakpoint()
+
+            # cost_caption = outputs['caption_costs'].flatten(0, 1)
+
+            # Final cost matrix
+            C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_sim * cost_sim
+
+            costs = {'cost_bbox': cost_bbox,
+                     'cost_class': cost_class,
+                     'cost_giou': cost_giou,
+                     'cost_sim': cost_sim,
+                     # 'cost_caption': cost_caption,
+                     'out_bbox': out_bbox[:, 0::2],
+                     }
+
+            if verbose:
+                print('\n')
+                print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0])
+                print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0])
+                print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0])
+                print(self.cost_sim, cost_sim.var(dim=0), cost_sim.max(dim=0)[0] - cost_sim.min(dim=0)[0])
+                # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0])
+
+            C = C.view(bs, num_queries, -1).cpu()
+
+            sizes = [text_embed.size(0)]
+            # pdb.set_trace()
+            indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+            m2o_rate = 4
+            rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))]
+            rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in
+                       enumerate(rl_indices)]
+
+            indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+
+
+            return indices, rl_indices    
+
+def build_matcher(args):
+    if args.matcher_type == 'DTW':
+        return DTWMatcher(keep_percentile=args.align_keep_percentile,
+                    top_band_size=args.align_top_band_size,
+                    given_droplines=None,
+                    drop_z=args.align_drop_z,
+                    one_to_many=args.align_one_to_many,
+                    many_to_one=args.align_many_to_one,
+                    contiguous=args.align_contiguous)
+    elif args.matcher_type == 'Sim':
+        return SimMatcher(cost_class=args.set_cost_class,
+                                cost_sim=args.set_cost_sim,
+                                cost_bbox=args.set_cost_bbox,
+                                cost_giou=args.set_cost_giou,
+                                cost_alpha = args.cost_alpha,
+                                cost_gamma = args.cost_gamma,
+                                use_pseudo_box = args.use_pseudo_box
+                                )
+    else:
+        return HungarianMatcher(cost_class=args.set_cost_class,
+                                cost_bbox=args.set_cost_bbox,
+                                cost_giou=args.set_cost_giou,
+                                cost_alpha = args.cost_alpha,
+                                cost_gamma = args.cost_gamma,
+                                use_pseudo_box = args.use_pseudo_box
+                                )
+
+
+def build_matcher_simple():
+    #return DTWMatcher(keep_percentile=0.5)
+    return SimMatcher()
+
+if __name__ == '__main__':
+    text_embed = torch.rand(5, 128)
+    event_embed = torch.rand(15, 128)
+    #sim = torch.eye(3, 4)
+    aligner = build_matcher_simple()
+    indices, matrices = aligner(text_embed, event_embed)
+    breakpoint()
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/matcher_align.py b/anet_clip/backup/pdvc/matcher_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9b93dce7e9ff252230fbb8f8bc2861ce3a16605
--- /dev/null
+++ b/anet_clip/backup/pdvc/matcher_align.py
@@ -0,0 +1,154 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Modules to compute the matching cost and solve the corresponding LSAP.
+"""
+import torch
+import torch.nn.functional as F
+from torch import log, exp
+import numpy as np
+from torch import nn
+from scipy.optimize import linear_sum_assignment
+# from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou
+
+# For matcher_align
+from dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine
+from dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine
+from dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine
+from dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine
+# from dp.gpu_nw import gpu_nw
+from dp.dp_utils import compute_all_costs, compute_double_costs
+
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+class DTWMatcher(nn.Module):
+    '''
+    Drop_z: if True, then we drop both the x axis (query) and z axis (text)
+    One_to_many: multiple x match to one z
+    Many_to_one: multiple z match to one x 
+    '''
+    def __init__(self,
+                keep_percentile,
+                top_band_size=0,
+                given_droplines=None,
+                drop_z=False,
+                one_to_many=False,
+                many_to_one=False,
+                contiguous=False):
+        super().__init__()
+        self.keep_percentile = keep_percentile
+        self.top_band_size = top_band_size
+        self.given_droplines = given_droplines
+        self.drop_z = drop_z
+        self.one_to_many = one_to_many
+        self.many_to_one = many_to_one
+        self.contiguous = contiguous
+
+    def forward(self, text_embed, event_embed):
+        # computing alignments (without gradients)
+        orig_device = event_embed.device
+        # embarisingly, this is faster on CPU than on GPU!
+        sims = compute_sim(text_embed, event_embed, l2_norm=True)
+        #sims = [s.cpu() for s in sims]
+        sims = [sims.cpu()]
+        self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines]
+        with torch.no_grad():
+            zx_costs_list = []
+            x_drop_costs_list = []
+            z_drop_costs_list = []
+            for i, sim in enumerate(sims):
+                # computing the baseline logit
+                top_sim = sim
+                if self.given_droplines is None:
+                    if self.top_band_size > 0 and self.top_band_size < sim.shape[1]:
+                        top_sim = sim.topk(self.top_band_size, dim=1).values
+
+                    if self.keep_percentile > 1:
+                        dropline = top_sim.min() - 5
+                    else:
+                        k = max([1, int(torch.numel(top_sim) * self.keep_percentile)])
+                        dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach()
+                else:
+                    dropline = self.given_droplines[i]
+
+                # shift the costs by the drop logits, so I can set drop costs to 0 instead
+                zx_costs_list.append(dropline.reshape([1, 1]) - sim)
+                z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device)
+                x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device)
+                z_drop_costs_list.append(z_drop_cost)
+                x_drop_costs_list.append(x_drop_cost)
+
+            # TODO figure out if one_to_many and many_to_one should be on
+            align_paths, corresp_mats = None, None
+            if self.drop_z:
+                if not (self.one_to_many or self.many_to_one):
+                    _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                    # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                else:
+                    _, align_paths = exact_batch_double_drop_dtw_machine(
+                        # _, align_paths = fast_batch_double_drop_dtw_machine(
+                        zx_costs_list,
+                        x_drop_costs_list,
+                        z_drop_costs_list,
+                        one_to_many=self.one_to_many,
+                        many_to_one=self.many_to_one,
+                        contiguous=self.contiguous,
+                    )
+            else:
+                _, align_paths = exact_batch_drop_dtw_machine(
+                    zx_costs_list,
+                    x_drop_costs_list,
+                    one_to_many=self.one_to_many,
+                    many_to_one=self.many_to_one,
+                    contiguous=self.contiguous,
+                )
+
+            if corresp_mats is None:
+                corresp_matrices = []
+                for b_id, sim in enumerate(sims):
+                    corresp_matrix = torch.zeros_like(sim)
+                    for i, j, s in align_paths[b_id]:
+                        if s == 0:
+                            corresp_matrix[i - 1, j - 1] = 1
+                    corresp_matrices.append(corresp_matrix.to(orig_device))
+                    # corresp_matrices.append(corresp_matrix)
+            text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0])
+            query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0])
+            text_indices, rearrange = torch.sort(text_indices)
+            query_indices = query_indices[rearrange]
+            indices = [(query_indices, text_indices)]
+        #return align_paths, corresp_matrices
+        return indices, _
+
+def build_matcher(args):
+    return DTWMatcher(keep_percentile=args.align_keep_percentile,
+                    top_band_size=args.align_top_band_size,
+                    given_droplines=None,
+                    drop_z=args.align_drop_z,
+                    one_to_many=args.align_one_to_many,
+                    many_to_one=args.align_many_to_one,
+                    contiguous=args.align_contiguous)
+
+
+def build_matcher_simple():
+    return DTWMatcher(keep_percentile=0.5)
+
+if __name__ == '__main__':
+    text_embed = torch.rand(5, 128)
+    event_embed = torch.rand(15, 128)
+    #sim = torch.eye(3, 4)
+    aligner = build_matcher_simple()
+    indices, matrices = aligner(text_embed, event_embed)
+    breakpoint()
diff --git a/anet_clip/backup/pdvc/modules/UniVL_mini.py b/anet_clip/backup/pdvc/modules/UniVL_mini.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c9d6e960cc742b2eed92827f568734ae91073ce
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/UniVL_mini.py
@@ -0,0 +1,1292 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+import copy
+import math
+import logging
+import collections
+import unicodedata
+import os
+from urllib.parse import urlparse
+from typing import Optional, Tuple, Union, IO, Callable, Set
+from pathlib import Path
+import shutil
+import tempfile
+import json
+from hashlib import sha256
+from functools import wraps
+import boto3
+from botocore.exceptions import ClientError
+import requests
+from tqdm import tqdm
+
+
+import torch
+from torch import nn
+
+
+logger = logging.getLogger(__name__)
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz",
+}
+
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
+    'base-uncased': 512,
+    'large-uncased': 512,
+    'base-cased': 512,
+    'large-cased': 512,
+    'base-multilingual-uncased': 512,
+    'base-multilingual-cased': 512,
+    'base-chinese': 512,
+}
+VOCAB_NAME = 'vocab.txt'
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+def split_s3_path(url: str) -> Tuple[str, str]:
+    """Split a full s3 path into the bucket name and path."""
+    parsed = urlparse(url)
+    if not parsed.netloc or not parsed.path:
+        raise ValueError("bad s3 path {}".format(url))
+    bucket_name = parsed.netloc
+    s3_path = parsed.path
+    # Remove '/' at beginning of path.
+    if s3_path.startswith("/"):
+        s3_path = s3_path[1:]
+    return bucket_name, s3_path
+
+def s3_request(func: Callable):
+    """
+    Wrapper function for s3 requests in order to create more helpful error
+    messages.
+    """
+
+    @wraps(func)
+    def wrapper(url: str, *args, **kwargs):
+        try:
+            return func(url, *args, **kwargs)
+        except ClientError as exc:
+            if int(exc.response["Error"]["Code"]) == 404:
+                raise FileNotFoundError("file {} not found".format(url))
+            else:
+                raise
+
+    return wrapper
+
+@s3_request
+def s3_etag(url: str) -> Optional[str]:
+    """Check ETag on S3 object."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_object = s3_resource.Object(bucket_name, s3_path)
+    return s3_object.e_tag
+
+@s3_request
+def s3_get(url: str, temp_file: IO) -> None:
+    """Pull a file directly from S3."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file)
+
+def url_to_filename(url: str, etag: str = None) -> str:
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the url's, delimited
+    by a period.
+    """
+    url_bytes = url.encode('utf-8')
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode('utf-8')
+        etag_hash = sha256(etag_bytes)
+        filename += '.' + etag_hash.hexdigest()
+
+    return filename
+
+def http_get(url: str, temp_file: IO) -> None:
+    req = requests.get(url, stream=True)
+    content_length = req.headers.get('Content-Length')
+    total = int(content_length) if content_length is not None else None
+    progress = tqdm(unit="B", total=total)
+    for chunk in req.iter_content(chunk_size=1024):
+        if chunk: # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given a URL, look for the corresponding dataset in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    os.makedirs(cache_dir, exist_ok=True)
+
+    # Get eTag to add to filename, if it exists.
+    if url.startswith("s3://"):
+        etag = s3_etag(url)
+    else:
+        response = requests.head(url, allow_redirects=True)
+        if response.status_code != 200:
+            raise IOError("HEAD request failed for url {} with status code {}"
+                          .format(url, response.status_code))
+        etag = response.headers.get("ETag")
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    if not os.path.exists(cache_path):
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with tempfile.NamedTemporaryFile() as temp_file:
+            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
+
+            # GET file object
+            if url.startswith("s3://"):
+                s3_get(url, temp_file)
+            else:
+                http_get(url, temp_file)
+
+            # we are copying the file before closing it, so flush to avoid truncation
+            temp_file.flush()
+            # shutil.copyfileobj() starts at the current position, so go to the start
+            temp_file.seek(0)
+
+            logger.info("copying %s to cache at %s", temp_file.name, cache_path)
+            with open(cache_path, 'wb') as cache_file:
+                shutil.copyfileobj(temp_file, cache_file)
+
+            logger.info("creating metadata file for %s", cache_path)
+            meta = {'url': url, 'etag': etag}
+            meta_path = cache_path + '.json'
+            with open(meta_path, 'w') as meta_file:
+                json.dump(meta, meta_file)
+
+            logger.info("removing temp file %s", temp_file.name)
+
+    return cache_path
+
+def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    parsed = urlparse(url_or_filename)
+
+    if parsed.scheme in ('http', 'https', 's3'):
+        # URL, so get it from the cache (downloading if necessary)
+        return get_from_cache(url_or_filename, cache_dir)
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        return url_or_filename
+    elif parsed.scheme == '':
+        # File, but it doesn't exist.
+        raise FileNotFoundError("file {} not found".format(url_or_filename))
+    else:
+        # Something unknown
+        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            if token not in self.vocab:
+                ids.append(self.vocab["[UNK]"])
+                logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token))
+            else:
+                ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            raise ValueError(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Download and cache the pre-trained model file if needed.
+        """
+        vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(vocab_file) is False:
+            if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
+                vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
+            else:
+                vocab_file = pretrained_model_name
+        if os.path.isdir(vocab_file):
+            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
+        # redirect to the cache, if necessary
+        print(vocab_file)
+        try:
+            resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            logger.error(
+                "Model name '{}' was not found. "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name,
+                    vocab_file))
+            return None
+        if resolved_vocab_file == vocab_file:
+            logger.info("loading vocabulary file {}".format(vocab_file))
+        else:
+            logger.info("loading vocabulary file {} from cache at {}".format(
+                vocab_file, resolved_vocab_file))
+        if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
+            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
+            # than the number of positional embeddings
+            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name]
+            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
+            kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")
+
+        # Instantiate tokenizer.
+        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
+
+        return tokenizer
+
+    def add_tokens(self, new_tokens, model):
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+        Args:
+            new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        Returns:
+            Number of tokens added to the vocabulary.
+        Examples::
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+
+        to_add_tokens = []
+        for token in new_tokens:
+            assert isinstance(token, str)
+            to_add_tokens.append(token)
+            # logger.info("Adding %s to the vocabulary", token)
+
+        vocab = collections.OrderedDict()
+        for token in self.vocab.keys():
+            vocab[token] = self.vocab[token]
+        for token in to_add_tokens:
+            vocab[token] = len(vocab)
+        self.vocab = self.wordpiece_tokenizer.vocab = vocab
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+
+        model.resize_token_embeddings(new_num_tokens=len(vocab))
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
+
+
+def gelu(x):
+    """Implementation of the gelu activation function.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+def swish(x):
+    return x * torch.sigmoid(x)
+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
+
+class LayerNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-12):
+        """Construct a layernorm module in the TF style (epsilon inside the square root).
+        """
+        super(LayerNorm, self).__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.bias = nn.Parameter(torch.zeros(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, x):
+        u = x.mean(-1, keepdim=True)
+        s = (x - u).pow(2).mean(-1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+        return self.weight * x + self.bias
+    
+class PretrainedConfig(object):
+
+    pretrained_model_archive_map = {}
+    config_name = ""
+    weights_name = ""
+
+    @classmethod
+    def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None):
+        archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(archive_file) is False:
+            if pretrained_model_name in cls.pretrained_model_archive_map:
+                archive_file = cls.pretrained_model_archive_map[pretrained_model_name]
+            else:
+                archive_file = pretrained_model_name
+
+        # redirect to the cache, if necessary
+        try:
+            resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            if task_config is None or task_config.local_rank == 0:
+                logger.error(
+                    "Model name '{}' was not found in model name list. "
+                    "We assumed '{}' was a path or url but couldn't find any file "
+                    "associated to this path or url.".format(
+                        pretrained_model_name,
+                        archive_file))
+            return None
+        if resolved_archive_file == archive_file:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {}".format(archive_file))
+        else:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {} from cache at {}".format(
+                    archive_file, resolved_archive_file))
+        tempdir = None
+        if os.path.isdir(resolved_archive_file):
+            serialization_dir = resolved_archive_file
+        else:
+            # Extract archive to temp dir
+            tempdir = tempfile.mkdtemp()
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("extracting archive file {} to temp dir {}".format(
+                    resolved_archive_file, tempdir))
+            with tarfile.open(resolved_archive_file, 'r:gz') as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        # Load config
+        config_file = os.path.join(serialization_dir, cls.config_name)
+        config = cls.from_json_file(config_file)
+        config.type_vocab_size = type_vocab_size
+        if task_config is None or task_config.local_rank == 0:
+            logger.info("Model config {}".format(config))
+
+        if state_dict is None:
+            weights_path = os.path.join(serialization_dir, cls.weights_name)
+            if os.path.exists(weights_path):
+                state_dict = torch.load(weights_path, map_location='cpu')
+            else:
+                if task_config is None or task_config.local_rank == 0:
+                    logger.info("Weight doesn't exsits. {}".format(weights_path))
+
+        if tempdir:
+            # Clean up temp dir
+            shutil.rmtree(tempdir)
+
+        return config, state_dict
+
+    @classmethod
+    def from_dict(cls, json_object):
+        """Constructs a `BertConfig` from a Python dictionary of parameters."""
+        config = cls(vocab_size_or_config_json_file=-1)
+        for key, value in json_object.items():
+            config.__dict__[key] = value
+        return config
+
+    @classmethod
+    def from_json_file(cls, json_file):
+        """Constructs a `BertConfig` from a json file of parameters."""
+        with open(json_file, "r", encoding='utf-8') as reader:
+            text = reader.read()
+        return cls.from_dict(json.loads(text))
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+    
+class BertConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `BertModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs BertConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `BertModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class PreTrainedModel(nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, config, *inputs, **kwargs):
+        super(PreTrainedModel, self).__init__()
+        # if not isinstance(config, PretrainedConfig):
+        #     raise ValueError(
+        #         "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+        #         "To create a model from a Google pretrained model use "
+        #         "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+        #             self.__class__.__name__, self.__class__.__name__
+        #         ))
+        self.config = config
+
+    def init_weights(self, module):
+        """ Initialize the weights.
+        """
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, LayerNorm):
+            if 'beta' in dir(module) and 'gamma' in dir(module):
+                module.beta.data.zero_()
+                module.gamma.data.fill_(1.0)
+            else:
+                module.bias.data.zero_()
+                module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def resize_token_embeddings(self, new_num_tokens=None):
+        raise NotImplementedError
+
+    @classmethod
+    def init_preweight(cls, model, state_dict, prefix=None, task_config=None):
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if 'gamma' in key:
+                new_key = key.replace('gamma', 'weight')
+            if 'beta' in key:
+                new_key = key.replace('beta', 'bias')
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        if prefix is not None:
+            old_keys = []
+            new_keys = []
+            for key in state_dict.keys():
+                old_keys.append(key)
+                new_keys.append(prefix + key)
+            for old_key, new_key in zip(old_keys, new_keys):
+                state_dict[new_key] = state_dict.pop(old_key)
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, '_metadata', None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=''):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + '.')
+
+        load(model, prefix='')
+
+        if prefix is None and (task_config is None or task_config.local_rank == 0):
+            logger.info("-" * 20)
+            if len(missing_keys) > 0:
+                logger.info("Weights of {} not initialized from pretrained model: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(missing_keys)))
+            if len(unexpected_keys) > 0:
+                logger.info("Weights from pretrained model not used in {}: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(unexpected_keys)))
+            if len(error_msgs) > 0:
+                logger.error("Weights from pretrained model cause errors in {}: {}"
+                             .format(model.__class__.__name__, "\n   " + "\n   ".join(error_msgs)))
+
+        return model
+
+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
+    @classmethod
+    def from_pretrained(cls, config, state_dict=None,  *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict.
+        Download and cache the pre-trained model file if needed.
+        """
+        # Instantiate model.
+        model = cls(config, *inputs, **kwargs)
+        if state_dict is None:
+            return model
+        model = cls.init_preweight(model, state_dict)
+
+        return model
+
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(BertEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids, token_type_ids=None):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = words_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(BertSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super(BertAttention, self).__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super(BertLayer, self).__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super(BertEncoder, self).__init__()
+        layer = BertLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super(BertPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(bert_model_embedding_weights.size(1),
+                                 bert_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = bert_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class BertOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(BertOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class BertPreTrainingHeads(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertPreTrainingHeads, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+class BertModel(PreTrainedModel):
+    """BERT model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a BertConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see BERT paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see BERT's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
+
+    config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.BertModel(config=config)
+    all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(BertModel, self).__init__(config)
+        self.config = config
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(input_ids, token_type_ids)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
+
+
+def build_UniVL_text_encoder(dict):
+    bert_config = BertConfig.from_dict(dict)
+    bert = BertModel(bert_config)
+
+    return bert
+
+def build_UniVL_tokenizer():
+    return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
+
+
+
+def load_pretrained_UniVL(args, device, n_gpu, local_rank, init_model=None):
+
+    if init_model:
+        model_state_dict = torch.load(init_model, map_location='cpu')
+    else:
+        model_state_dict = None
+
+    # Prepare model
+    cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed')
+    model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base',
+                                   cache_dir=cache_dir, state_dict=model_state_dict, task_config=args)
+
+    model.to(device)
+
+    return model
+
+if __name__ == '__main__':
+    bert_config_dict = {
+    "attention_probs_dropout_prob": 0.1,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "max_position_embeddings": 512,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "type_vocab_size": 2,
+    "vocab_size": 30522
+    }
+    tokenizer = build_UniVL_tokenizer()
+    bert = build_UniVL_text_encoder(bert_config_dict)
+    words = ["[CLS]"] + ['you', 'love', 'you'] + ["[SEP]"]
+    #input_ids = tokenizer.convert_tokens_to_ids(words)
+    #masked_tokens = words.copy()
+    #masked_token_ids = tokenizer.convert_tokens_to_ids(masked_tokens)
+    token_type_ids = None
+    breakpoint()
+    encoded_layers, _ = bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True)
+    sequence_output = encoded_layers[-1]
+
diff --git a/anet_clip/backup/pdvc/modules/__init__.py b/anet_clip/backup/pdvc/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d34e0a02cf990fffc878b695beee9637074e33d0
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..874dd9210e523da3f66f8b15054a96cadeee908f
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c2030617dd39a30551bcda930768bb5af198af31
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3218f01ae734e108885fd322ff9db4dc73b204fe
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0585085b54395651e7fa6b8fb60d877f579733ce
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f39ca45e9cc3f91242f4e039001dc5f6f2636af
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93a08af0acd6f720e525203d381fe91f1bc3b33f
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6631deadc8c18f93e755eca7dd975b6ce83b6ca1
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5148122f4202468a675012274b2eead3a84a1510
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff40d3cabb4bad221fc02eb703a9b76971c01709
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc b/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64204d60a2c8da0639a86e05802791c7a65e4c17
Binary files /dev/null and b/anet_clip/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/modules/beam.py b/anet_clip/backup/pdvc/modules/beam.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff1d961ef393e03a3c9105022b1047f5ea7133d
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/beam.py
@@ -0,0 +1,116 @@
+"""
+Manage beam search info structure.
+Heavily borrowed from OpenNMT-py.
+For code in OpenNMT-py, please check the following link (maybe in oldest version):
+https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py
+"""
+
+import torch
+
+class Constants():
+    def __init__(self):
+        self.PAD = 0
+        self.UNK = 1
+        self.BOS = 2
+        self.EOS = 3
+        self.PAD_WORD = '[PAD]'
+        self.UNK_WORD = '[UNK]'
+        self.BOS_WORD = '[CLS]'
+        self.EOS_WORD = '[SEP]'
+
+    @classmethod
+    def from_tokenizer(cls, tokenizer):
+        instance = cls()
+        instance.PAD = tokenizer.vocab[instance.PAD_WORD]
+        instance.UNK = tokenizer.vocab[instance.UNK_WORD]
+        instance.BOS = tokenizer.vocab[instance.BOS_WORD]
+        instance.EOS = tokenizer.vocab[instance.EOS_WORD]
+        return instance
+
+class Beam():
+    ''' Beam search '''
+
+    def __init__(self, size, device=False, tokenizer=None):
+        if tokenizer is None:
+            self.constants = Constants()
+        else:
+            self.constants = Constants.from_tokenizer(tokenizer)
+
+        self.size = size
+        self._done = False
+        # The score for each interface on the beam.
+        self.scores = torch.zeros((size,), dtype=torch.float, device=device)
+        self.all_scores = []
+
+        # The backpointers at each time-step.
+        self.prev_ks = []
+
+        # The outputs at each time-step.
+        self.next_ys = [torch.full((size,), self.constants.BOS, dtype=torch.long, device=device)]
+
+    def get_current_state(self):
+        "Get the outputs for the current timestep."
+        return self.get_tentative_hypothesis()
+
+    def get_current_origin(self):
+        "Get the backpointers for the current timestep."
+        return self.prev_ks[-1]
+
+    @property
+    def done(self):
+        return self._done
+
+    def advance(self, word_prob, word_length=None):
+
+        "Update beam status and check if finished or not."
+        num_words = word_prob.size(1)
+        # Sum the previous scores.
+        if len(self.prev_ks) > 0:
+            beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
+        else:
+            beam_lk = word_prob[0]
+        flat_beam_lk = beam_lk.view(-1)
+        best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort
+        self.all_scores.append(self.scores)
+        self.scores = best_scores
+        # bestScoresId is flattened as a (beam x word) array,
+        # so we need to calculate which word and beam each score came from
+        prev_k = best_scores_id // num_words
+        self.prev_ks.append(prev_k)
+        self.next_ys.append(best_scores_id - prev_k * num_words)
+        # End condition is when top-of-beam is EOS.
+        if self.next_ys[-1][0].item() == self.constants.EOS:
+            self._done = True
+
+        return self._done
+
+    def sort_scores(self):
+        "Sort the scores."
+        return torch.sort(self.scores, 0, True)
+
+    def get_the_best_score_and_idx(self):
+        "Get the score of the best in the beam."
+        scores, ids = self.sort_scores()
+        return scores[1], ids[1]
+
+    def get_tentative_hypothesis(self):
+        "Get the decoded sequence for the current timestep."
+
+        if len(self.next_ys) == 1:
+            dec_seq = self.next_ys[0].unsqueeze(1)
+        else:
+            _, keys = self.sort_scores()
+            hyps = [self.get_hypothesis(k) for k in keys]
+            hyps = [[self.constants.BOS] + h for h in hyps]
+            dec_seq = torch.LongTensor(hyps)
+
+        return dec_seq
+
+    def get_hypothesis(self, k):
+        """ Walk back to construct the full hypothesis. """
+        hyp = []
+        for j in range(len(self.prev_ks) - 1, -1, -1):
+            hyp.append(self.next_ys[j+1][k])
+            k = self.prev_ks[j][k]
+
+        return list(map(lambda x: x.item(), hyp[::-1]))
diff --git a/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json b/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..fca794a5f07ff8f963fe8b61e3694b0fb7f955df
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/bert-base-uncased/bert_config.json
@@ -0,0 +1,13 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}
diff --git a/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt b/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb140275c155a9c7c5a3b3e0e77a9e839594a938
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/bert-base-uncased/vocab.txt
@@ -0,0 +1,30522 @@
+[PAD]
+[unused0]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+[unused99]
+[unused100]
+[unused101]
+[unused102]
+[unused103]
+[unused104]
+[unused105]
+[unused106]
+[unused107]
+[unused108]
+[unused109]
+[unused110]
+[unused111]
+[unused112]
+[unused113]
+[unused114]
+[unused115]
+[unused116]
+[unused117]
+[unused118]
+[unused119]
+[unused120]
+[unused121]
+[unused122]
+[unused123]
+[unused124]
+[unused125]
+[unused126]
+[unused127]
+[unused128]
+[unused129]
+[unused130]
+[unused131]
+[unused132]
+[unused133]
+[unused134]
+[unused135]
+[unused136]
+[unused137]
+[unused138]
+[unused139]
+[unused140]
+[unused141]
+[unused142]
+[unused143]
+[unused144]
+[unused145]
+[unused146]
+[unused147]
+[unused148]
+[unused149]
+[unused150]
+[unused151]
+[unused152]
+[unused153]
+[unused154]
+[unused155]
+[unused156]
+[unused157]
+[unused158]
+[unused159]
+[unused160]
+[unused161]
+[unused162]
+[unused163]
+[unused164]
+[unused165]
+[unused166]
+[unused167]
+[unused168]
+[unused169]
+[unused170]
+[unused171]
+[unused172]
+[unused173]
+[unused174]
+[unused175]
+[unused176]
+[unused177]
+[unused178]
+[unused179]
+[unused180]
+[unused181]
+[unused182]
+[unused183]
+[unused184]
+[unused185]
+[unused186]
+[unused187]
+[unused188]
+[unused189]
+[unused190]
+[unused191]
+[unused192]
+[unused193]
+[unused194]
+[unused195]
+[unused196]
+[unused197]
+[unused198]
+[unused199]
+[unused200]
+[unused201]
+[unused202]
+[unused203]
+[unused204]
+[unused205]
+[unused206]
+[unused207]
+[unused208]
+[unused209]
+[unused210]
+[unused211]
+[unused212]
+[unused213]
+[unused214]
+[unused215]
+[unused216]
+[unused217]
+[unused218]
+[unused219]
+[unused220]
+[unused221]
+[unused222]
+[unused223]
+[unused224]
+[unused225]
+[unused226]
+[unused227]
+[unused228]
+[unused229]
+[unused230]
+[unused231]
+[unused232]
+[unused233]
+[unused234]
+[unused235]
+[unused236]
+[unused237]
+[unused238]
+[unused239]
+[unused240]
+[unused241]
+[unused242]
+[unused243]
+[unused244]
+[unused245]
+[unused246]
+[unused247]
+[unused248]
+[unused249]
+[unused250]
+[unused251]
+[unused252]
+[unused253]
+[unused254]
+[unused255]
+[unused256]
+[unused257]
+[unused258]
+[unused259]
+[unused260]
+[unused261]
+[unused262]
+[unused263]
+[unused264]
+[unused265]
+[unused266]
+[unused267]
+[unused268]
+[unused269]
+[unused270]
+[unused271]
+[unused272]
+[unused273]
+[unused274]
+[unused275]
+[unused276]
+[unused277]
+[unused278]
+[unused279]
+[unused280]
+[unused281]
+[unused282]
+[unused283]
+[unused284]
+[unused285]
+[unused286]
+[unused287]
+[unused288]
+[unused289]
+[unused290]
+[unused291]
+[unused292]
+[unused293]
+[unused294]
+[unused295]
+[unused296]
+[unused297]
+[unused298]
+[unused299]
+[unused300]
+[unused301]
+[unused302]
+[unused303]
+[unused304]
+[unused305]
+[unused306]
+[unused307]
+[unused308]
+[unused309]
+[unused310]
+[unused311]
+[unused312]
+[unused313]
+[unused314]
+[unused315]
+[unused316]
+[unused317]
+[unused318]
+[unused319]
+[unused320]
+[unused321]
+[unused322]
+[unused323]
+[unused324]
+[unused325]
+[unused326]
+[unused327]
+[unused328]
+[unused329]
+[unused330]
+[unused331]
+[unused332]
+[unused333]
+[unused334]
+[unused335]
+[unused336]
+[unused337]
+[unused338]
+[unused339]
+[unused340]
+[unused341]
+[unused342]
+[unused343]
+[unused344]
+[unused345]
+[unused346]
+[unused347]
+[unused348]
+[unused349]
+[unused350]
+[unused351]
+[unused352]
+[unused353]
+[unused354]
+[unused355]
+[unused356]
+[unused357]
+[unused358]
+[unused359]
+[unused360]
+[unused361]
+[unused362]
+[unused363]
+[unused364]
+[unused365]
+[unused366]
+[unused367]
+[unused368]
+[unused369]
+[unused370]
+[unused371]
+[unused372]
+[unused373]
+[unused374]
+[unused375]
+[unused376]
+[unused377]
+[unused378]
+[unused379]
+[unused380]
+[unused381]
+[unused382]
+[unused383]
+[unused384]
+[unused385]
+[unused386]
+[unused387]
+[unused388]
+[unused389]
+[unused390]
+[unused391]
+[unused392]
+[unused393]
+[unused394]
+[unused395]
+[unused396]
+[unused397]
+[unused398]
+[unused399]
+[unused400]
+[unused401]
+[unused402]
+[unused403]
+[unused404]
+[unused405]
+[unused406]
+[unused407]
+[unused408]
+[unused409]
+[unused410]
+[unused411]
+[unused412]
+[unused413]
+[unused414]
+[unused415]
+[unused416]
+[unused417]
+[unused418]
+[unused419]
+[unused420]
+[unused421]
+[unused422]
+[unused423]
+[unused424]
+[unused425]
+[unused426]
+[unused427]
+[unused428]
+[unused429]
+[unused430]
+[unused431]
+[unused432]
+[unused433]
+[unused434]
+[unused435]
+[unused436]
+[unused437]
+[unused438]
+[unused439]
+[unused440]
+[unused441]
+[unused442]
+[unused443]
+[unused444]
+[unused445]
+[unused446]
+[unused447]
+[unused448]
+[unused449]
+[unused450]
+[unused451]
+[unused452]
+[unused453]
+[unused454]
+[unused455]
+[unused456]
+[unused457]
+[unused458]
+[unused459]
+[unused460]
+[unused461]
+[unused462]
+[unused463]
+[unused464]
+[unused465]
+[unused466]
+[unused467]
+[unused468]
+[unused469]
+[unused470]
+[unused471]
+[unused472]
+[unused473]
+[unused474]
+[unused475]
+[unused476]
+[unused477]
+[unused478]
+[unused479]
+[unused480]
+[unused481]
+[unused482]
+[unused483]
+[unused484]
+[unused485]
+[unused486]
+[unused487]
+[unused488]
+[unused489]
+[unused490]
+[unused491]
+[unused492]
+[unused493]
+[unused494]
+[unused495]
+[unused496]
+[unused497]
+[unused498]
+[unused499]
+[unused500]
+[unused501]
+[unused502]
+[unused503]
+[unused504]
+[unused505]
+[unused506]
+[unused507]
+[unused508]
+[unused509]
+[unused510]
+[unused511]
+[unused512]
+[unused513]
+[unused514]
+[unused515]
+[unused516]
+[unused517]
+[unused518]
+[unused519]
+[unused520]
+[unused521]
+[unused522]
+[unused523]
+[unused524]
+[unused525]
+[unused526]
+[unused527]
+[unused528]
+[unused529]
+[unused530]
+[unused531]
+[unused532]
+[unused533]
+[unused534]
+[unused535]
+[unused536]
+[unused537]
+[unused538]
+[unused539]
+[unused540]
+[unused541]
+[unused542]
+[unused543]
+[unused544]
+[unused545]
+[unused546]
+[unused547]
+[unused548]
+[unused549]
+[unused550]
+[unused551]
+[unused552]
+[unused553]
+[unused554]
+[unused555]
+[unused556]
+[unused557]
+[unused558]
+[unused559]
+[unused560]
+[unused561]
+[unused562]
+[unused563]
+[unused564]
+[unused565]
+[unused566]
+[unused567]
+[unused568]
+[unused569]
+[unused570]
+[unused571]
+[unused572]
+[unused573]
+[unused574]
+[unused575]
+[unused576]
+[unused577]
+[unused578]
+[unused579]
+[unused580]
+[unused581]
+[unused582]
+[unused583]
+[unused584]
+[unused585]
+[unused586]
+[unused587]
+[unused588]
+[unused589]
+[unused590]
+[unused591]
+[unused592]
+[unused593]
+[unused594]
+[unused595]
+[unused596]
+[unused597]
+[unused598]
+[unused599]
+[unused600]
+[unused601]
+[unused602]
+[unused603]
+[unused604]
+[unused605]
+[unused606]
+[unused607]
+[unused608]
+[unused609]
+[unused610]
+[unused611]
+[unused612]
+[unused613]
+[unused614]
+[unused615]
+[unused616]
+[unused617]
+[unused618]
+[unused619]
+[unused620]
+[unused621]
+[unused622]
+[unused623]
+[unused624]
+[unused625]
+[unused626]
+[unused627]
+[unused628]
+[unused629]
+[unused630]
+[unused631]
+[unused632]
+[unused633]
+[unused634]
+[unused635]
+[unused636]
+[unused637]
+[unused638]
+[unused639]
+[unused640]
+[unused641]
+[unused642]
+[unused643]
+[unused644]
+[unused645]
+[unused646]
+[unused647]
+[unused648]
+[unused649]
+[unused650]
+[unused651]
+[unused652]
+[unused653]
+[unused654]
+[unused655]
+[unused656]
+[unused657]
+[unused658]
+[unused659]
+[unused660]
+[unused661]
+[unused662]
+[unused663]
+[unused664]
+[unused665]
+[unused666]
+[unused667]
+[unused668]
+[unused669]
+[unused670]
+[unused671]
+[unused672]
+[unused673]
+[unused674]
+[unused675]
+[unused676]
+[unused677]
+[unused678]
+[unused679]
+[unused680]
+[unused681]
+[unused682]
+[unused683]
+[unused684]
+[unused685]
+[unused686]
+[unused687]
+[unused688]
+[unused689]
+[unused690]
+[unused691]
+[unused692]
+[unused693]
+[unused694]
+[unused695]
+[unused696]
+[unused697]
+[unused698]
+[unused699]
+[unused700]
+[unused701]
+[unused702]
+[unused703]
+[unused704]
+[unused705]
+[unused706]
+[unused707]
+[unused708]
+[unused709]
+[unused710]
+[unused711]
+[unused712]
+[unused713]
+[unused714]
+[unused715]
+[unused716]
+[unused717]
+[unused718]
+[unused719]
+[unused720]
+[unused721]
+[unused722]
+[unused723]
+[unused724]
+[unused725]
+[unused726]
+[unused727]
+[unused728]
+[unused729]
+[unused730]
+[unused731]
+[unused732]
+[unused733]
+[unused734]
+[unused735]
+[unused736]
+[unused737]
+[unused738]
+[unused739]
+[unused740]
+[unused741]
+[unused742]
+[unused743]
+[unused744]
+[unused745]
+[unused746]
+[unused747]
+[unused748]
+[unused749]
+[unused750]
+[unused751]
+[unused752]
+[unused753]
+[unused754]
+[unused755]
+[unused756]
+[unused757]
+[unused758]
+[unused759]
+[unused760]
+[unused761]
+[unused762]
+[unused763]
+[unused764]
+[unused765]
+[unused766]
+[unused767]
+[unused768]
+[unused769]
+[unused770]
+[unused771]
+[unused772]
+[unused773]
+[unused774]
+[unused775]
+[unused776]
+[unused777]
+[unused778]
+[unused779]
+[unused780]
+[unused781]
+[unused782]
+[unused783]
+[unused784]
+[unused785]
+[unused786]
+[unused787]
+[unused788]
+[unused789]
+[unused790]
+[unused791]
+[unused792]
+[unused793]
+[unused794]
+[unused795]
+[unused796]
+[unused797]
+[unused798]
+[unused799]
+[unused800]
+[unused801]
+[unused802]
+[unused803]
+[unused804]
+[unused805]
+[unused806]
+[unused807]
+[unused808]
+[unused809]
+[unused810]
+[unused811]
+[unused812]
+[unused813]
+[unused814]
+[unused815]
+[unused816]
+[unused817]
+[unused818]
+[unused819]
+[unused820]
+[unused821]
+[unused822]
+[unused823]
+[unused824]
+[unused825]
+[unused826]
+[unused827]
+[unused828]
+[unused829]
+[unused830]
+[unused831]
+[unused832]
+[unused833]
+[unused834]
+[unused835]
+[unused836]
+[unused837]
+[unused838]
+[unused839]
+[unused840]
+[unused841]
+[unused842]
+[unused843]
+[unused844]
+[unused845]
+[unused846]
+[unused847]
+[unused848]
+[unused849]
+[unused850]
+[unused851]
+[unused852]
+[unused853]
+[unused854]
+[unused855]
+[unused856]
+[unused857]
+[unused858]
+[unused859]
+[unused860]
+[unused861]
+[unused862]
+[unused863]
+[unused864]
+[unused865]
+[unused866]
+[unused867]
+[unused868]
+[unused869]
+[unused870]
+[unused871]
+[unused872]
+[unused873]
+[unused874]
+[unused875]
+[unused876]
+[unused877]
+[unused878]
+[unused879]
+[unused880]
+[unused881]
+[unused882]
+[unused883]
+[unused884]
+[unused885]
+[unused886]
+[unused887]
+[unused888]
+[unused889]
+[unused890]
+[unused891]
+[unused892]
+[unused893]
+[unused894]
+[unused895]
+[unused896]
+[unused897]
+[unused898]
+[unused899]
+[unused900]
+[unused901]
+[unused902]
+[unused903]
+[unused904]
+[unused905]
+[unused906]
+[unused907]
+[unused908]
+[unused909]
+[unused910]
+[unused911]
+[unused912]
+[unused913]
+[unused914]
+[unused915]
+[unused916]
+[unused917]
+[unused918]
+[unused919]
+[unused920]
+[unused921]
+[unused922]
+[unused923]
+[unused924]
+[unused925]
+[unused926]
+[unused927]
+[unused928]
+[unused929]
+[unused930]
+[unused931]
+[unused932]
+[unused933]
+[unused934]
+[unused935]
+[unused936]
+[unused937]
+[unused938]
+[unused939]
+[unused940]
+[unused941]
+[unused942]
+[unused943]
+[unused944]
+[unused945]
+[unused946]
+[unused947]
+[unused948]
+[unused949]
+[unused950]
+[unused951]
+[unused952]
+[unused953]
+[unused954]
+[unused955]
+[unused956]
+[unused957]
+[unused958]
+[unused959]
+[unused960]
+[unused961]
+[unused962]
+[unused963]
+[unused964]
+[unused965]
+[unused966]
+[unused967]
+[unused968]
+[unused969]
+[unused970]
+[unused971]
+[unused972]
+[unused973]
+[unused974]
+[unused975]
+[unused976]
+[unused977]
+[unused978]
+[unused979]
+[unused980]
+[unused981]
+[unused982]
+[unused983]
+[unused984]
+[unused985]
+[unused986]
+[unused987]
+[unused988]
+[unused989]
+[unused990]
+[unused991]
+[unused992]
+[unused993]
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+¡
+¢
+£
+¤
+¥
+¦
+§
+¨
+©
+ª
+«
+¬
+®
+°
+±
+²
+³
+´
+µ
+¶
+·
+¹
+º
+»
+¼
+½
+¾
+¿
+×
+ß
+æ
+ð
+÷
+ø
+þ
+đ
+ħ
+ı
+ł
+ŋ
+œ
+ƒ
+ɐ
+ɑ
+ɒ
+ɔ
+ɕ
+ə
+ɛ
+ɡ
+ɣ
+ɨ
+ɪ
+ɫ
+ɬ
+ɯ
+ɲ
+ɴ
+ɹ
+ɾ
+ʀ
+ʁ
+ʂ
+ʃ
+ʉ
+ʊ
+ʋ
+ʌ
+ʎ
+ʐ
+ʑ
+ʒ
+ʔ
+ʰ
+ʲ
+ʳ
+ʷ
+ʸ
+ʻ
+ʼ
+ʾ
+ʿ
+ˈ
+ː
+ˡ
+ˢ
+ˣ
+ˤ
+α
+β
+γ
+δ
+ε
+ζ
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ξ
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+щ
+ъ
+ы
+ь
+э
+ю
+я
+ђ
+є
+і
+ј
+љ
+њ
+ћ
+ӏ
+ա
+բ
+գ
+դ
+ե
+թ
+ի
+լ
+կ
+հ
+մ
+յ
+ն
+ո
+պ
+ս
+վ
+տ
+ր
+ւ
+ք
+־
+א
+ב
+ג
+ד
+ה
+ו
+ז
+ח
+ט
+י
+ך
+כ
+ל
+ם
+מ
+ן
+נ
+ס
+ע
+ף
+פ
+ץ
+צ
+ק
+ר
+ש
+ת
+،
+ء
+ا
+ب
+ة
+ت
+ث
+ج
+ح
+خ
+د
+ذ
+ر
+ز
+س
+ش
+ص
+ض
+ط
+ظ
+ع
+غ
+ـ
+ف
+ق
+ك
+ل
+م
+ن
+ه
+و
+ى
+ي
+ٹ
+پ
+چ
+ک
+گ
+ں
+ھ
+ہ
+ی
+ے
+अ
+आ
+उ
+ए
+क
+ख
+ग
+च
+ज
+ट
+ड
+ण
+त
+थ
+द
+ध
+न
+प
+ब
+भ
+म
+य
+र
+ल
+व
+श
+ष
+स
+ह
+ा
+ि
+ी
+ो
+।
+॥
+ং
+অ
+আ
+ই
+উ
+এ
+ও
+ক
+খ
+গ
+চ
+ছ
+জ
+ট
+ড
+ণ
+ত
+থ
+দ
+ধ
+ন
+প
+ব
+ভ
+ম
+য
+র
+ল
+শ
+ষ
+স
+হ
+া
+ি
+ী
+ে
+க
+ச
+ட
+த
+ந
+ன
+ப
+ம
+ய
+ர
+ல
+ள
+வ
+ா
+ி
+ு
+ே
+ை
+ನ
+ರ
+ಾ
+ක
+ය
+ර
+ල
+ව
+ා
+ก
+ง
+ต
+ท
+น
+พ
+ม
+ย
+ร
+ล
+ว
+ส
+อ
+า
+เ
+་
+།
+ག
+ང
+ད
+ན
+པ
+བ
+མ
+འ
+ར
+ལ
+ས
+မ
+ა
+ბ
+გ
+დ
+ე
+ვ
+თ
+ი
+კ
+ლ
+მ
+ნ
+ო
+რ
+ს
+ტ
+უ
+ᄀ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄉ
+ᄊ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅥ
+ᅦ
+ᅧ
+ᅩ
+ᅪ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆼ
+ᴬ
+ᴮ
+ᴰ
+ᴵ
+ᴺ
+ᵀ
+ᵃ
+ᵇ
+ᵈ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵖ
+ᵗ
+ᵘ
+ᵢ
+ᵣ
+ᵤ
+ᵥ
+ᶜ
+ᶠ
+‐
+‑
+‒
+–
+—
+―
+‖
+‘
+’
+‚
+“
+”
+„
+†
+‡
+•
+…
+‰
+′
+″
+›
+‿
+⁄
+⁰
+ⁱ
+⁴
+⁵
+⁶
+⁷
+⁸
+⁹
+⁺
+⁻
+ⁿ
+₀
+₁
+₂
+₃
+₄
+₅
+₆
+₇
+₈
+₉
+₊
+₍
+₎
+ₐ
+ₑ
+ₒ
+ₓ
+ₕ
+ₖ
+ₗ
+ₘ
+ₙ
+ₚ
+ₛ
+ₜ
+₤
+₩
+€
+₱
+₹
+ℓ
+№
+ℝ
+™
+⅓
+⅔
+←
+↑
+→
+↓
+↔
+↦
+⇄
+⇌
+⇒
+∂
+∅
+∆
+∇
+∈
+−
+∗
+∘
+√
+∞
+∧
+∨
+∩
+∪
+≈
+≡
+≤
+≥
+⊂
+⊆
+⊕
+⊗
+⋅
+─
+│
+■
+▪
+●
+★
+☆
+☉
+♠
+♣
+♥
+♦
+♭
+♯
+⟨
+⟩
+ⱼ
+⺩
+⺼
+⽥
+、
+。
+〈
+〉
+《
+》
+「
+」
+『
+』
+〜
+あ
+い
+う
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+や
+ゆ
+よ
+ら
+り
+る
+れ
+ろ
+を
+ん
+ァ
+ア
+ィ
+イ
+ウ
+ェ
+エ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ュ
+ョ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ン
+・
+ー
+一
+三
+上
+下
+不
+世
+中
+主
+久
+之
+也
+事
+二
+五
+井
+京
+人
+亻
+仁
+介
+代
+仮
+伊
+会
+佐
+侍
+保
+信
+健
+元
+光
+八
+公
+内
+出
+分
+前
+劉
+力
+加
+勝
+北
+区
+十
+千
+南
+博
+原
+口
+古
+史
+司
+合
+吉
+同
+名
+和
+囗
+四
+国
+國
+土
+地
+坂
+城
+堂
+場
+士
+夏
+外
+大
+天
+太
+夫
+奈
+女
+子
+学
+宀
+宇
+安
+宗
+定
+宣
+宮
+家
+宿
+寺
+將
+小
+尚
+山
+岡
+島
+崎
+川
+州
+巿
+帝
+平
+年
+幸
+广
+弘
+張
+彳
+後
+御
+德
+心
+忄
+志
+忠
+愛
+成
+我
+戦
+戸
+手
+扌
+政
+文
+新
+方
+日
+明
+星
+春
+昭
+智
+曲
+書
+月
+有
+朝
+木
+本
+李
+村
+東
+松
+林
+森
+楊
+樹
+橋
+歌
+止
+正
+武
+比
+氏
+民
+水
+氵
+氷
+永
+江
+沢
+河
+治
+法
+海
+清
+漢
+瀬
+火
+版
+犬
+王
+生
+田
+男
+疒
+発
+白
+的
+皇
+目
+相
+省
+真
+石
+示
+社
+神
+福
+禾
+秀
+秋
+空
+立
+章
+竹
+糹
+美
+義
+耳
+良
+艹
+花
+英
+華
+葉
+藤
+行
+街
+西
+見
+訁
+語
+谷
+貝
+貴
+車
+軍
+辶
+道
+郎
+郡
+部
+都
+里
+野
+金
+鈴
+镇
+長
+門
+間
+阝
+阿
+陳
+陽
+雄
+青
+面
+風
+食
+香
+馬
+高
+龍
+龸
+ﬁ
+ﬂ
+！
+（
+）
+，
+－
+．
+／
+：
+？
+～
+the
+of
+and
+in
+to
+was
+he
+is
+as
+for
+on
+with
+that
+it
+his
+by
+at
+from
+her
+##s
+she
+you
+had
+an
+were
+but
+be
+this
+are
+not
+my
+they
+one
+which
+or
+have
+him
+me
+first
+all
+also
+their
+has
+up
+who
+out
+been
+when
+after
+there
+into
+new
+two
+its
+##a
+time
+would
+no
+what
+about
+said
+we
+over
+then
+other
+so
+more
+##e
+can
+if
+like
+back
+them
+only
+some
+could
+##i
+where
+just
+##ing
+during
+before
+##n
+do
+##o
+made
+school
+through
+than
+now
+years
+most
+world
+may
+between
+down
+well
+three
+##d
+year
+while
+will
+##ed
+##r
+##y
+later
+##t
+city
+under
+around
+did
+such
+being
+used
+state
+people
+part
+know
+against
+your
+many
+second
+university
+both
+national
+##er
+these
+don
+known
+off
+way
+until
+re
+how
+even
+get
+head
+...
+didn
+##ly
+team
+american
+because
+de
+##l
+born
+united
+film
+since
+still
+long
+work
+south
+us
+became
+any
+high
+again
+day
+family
+see
+right
+man
+eyes
+house
+season
+war
+states
+including
+took
+life
+north
+same
+each
+called
+name
+much
+place
+however
+go
+four
+group
+another
+found
+won
+area
+here
+going
+10
+away
+series
+left
+home
+music
+best
+make
+hand
+number
+company
+several
+never
+last
+john
+000
+very
+album
+take
+end
+good
+too
+following
+released
+game
+played
+little
+began
+district
+##m
+old
+want
+those
+side
+held
+own
+early
+county
+ll
+league
+use
+west
+##u
+face
+think
+##es
+2010
+government
+##h
+march
+came
+small
+general
+town
+june
+##on
+line
+based
+something
+##k
+september
+thought
+looked
+along
+international
+2011
+air
+july
+club
+went
+january
+october
+our
+august
+april
+york
+12
+few
+2012
+2008
+east
+show
+member
+college
+2009
+father
+public
+##us
+come
+men
+five
+set
+station
+church
+##c
+next
+former
+november
+room
+party
+located
+december
+2013
+age
+got
+2007
+##g
+system
+let
+love
+2006
+though
+every
+2014
+look
+song
+water
+century
+without
+body
+black
+night
+within
+great
+women
+single
+ve
+building
+large
+population
+river
+named
+band
+white
+started
+##an
+once
+15
+20
+should
+18
+2015
+service
+top
+built
+british
+open
+death
+king
+moved
+local
+times
+children
+february
+book
+why
+11
+door
+need
+president
+order
+final
+road
+wasn
+although
+due
+major
+died
+village
+third
+knew
+2016
+asked
+turned
+st
+wanted
+say
+##p
+together
+received
+main
+son
+served
+different
+##en
+behind
+himself
+felt
+members
+power
+football
+law
+voice
+play
+##in
+near
+park
+history
+30
+having
+2005
+16
+##man
+saw
+mother
+##al
+army
+point
+front
+help
+english
+street
+art
+late
+hands
+games
+award
+##ia
+young
+14
+put
+published
+country
+division
+across
+told
+13
+often
+ever
+french
+london
+center
+six
+red
+2017
+led
+days
+include
+light
+25
+find
+tell
+among
+species
+really
+according
+central
+half
+2004
+form
+original
+gave
+office
+making
+enough
+lost
+full
+opened
+must
+included
+live
+given
+german
+player
+run
+business
+woman
+community
+cup
+might
+million
+land
+2000
+court
+development
+17
+short
+round
+ii
+km
+seen
+class
+story
+always
+become
+sure
+research
+almost
+director
+council
+la
+##2
+career
+things
+using
+island
+##z
+couldn
+car
+##is
+24
+close
+force
+##1
+better
+free
+support
+control
+field
+students
+2003
+education
+married
+##b
+nothing
+worked
+others
+record
+big
+inside
+level
+anything
+continued
+give
+james
+##3
+military
+established
+non
+returned
+feel
+does
+title
+written
+thing
+feet
+william
+far
+co
+association
+hard
+already
+2002
+##ra
+championship
+human
+western
+100
+##na
+department
+hall
+role
+various
+production
+21
+19
+heart
+2001
+living
+fire
+version
+##ers
+##f
+television
+royal
+##4
+produced
+working
+act
+case
+society
+region
+present
+radio
+period
+looking
+least
+total
+keep
+england
+wife
+program
+per
+brother
+mind
+special
+22
+##le
+am
+works
+soon
+##6
+political
+george
+services
+taken
+created
+##7
+further
+able
+reached
+david
+union
+joined
+upon
+done
+important
+social
+information
+either
+##ic
+##x
+appeared
+position
+ground
+lead
+rock
+dark
+election
+23
+board
+france
+hair
+course
+arms
+site
+police
+girl
+instead
+real
+sound
+##v
+words
+moment
+##te
+someone
+##8
+summer
+project
+announced
+san
+less
+wrote
+past
+followed
+##5
+blue
+founded
+al
+finally
+india
+taking
+records
+america
+##ne
+1999
+design
+considered
+northern
+god
+stop
+battle
+toward
+european
+outside
+described
+track
+today
+playing
+language
+28
+call
+26
+heard
+professional
+low
+australia
+miles
+california
+win
+yet
+green
+##ie
+trying
+blood
+##ton
+southern
+science
+maybe
+everything
+match
+square
+27
+mouth
+video
+race
+recorded
+leave
+above
+##9
+daughter
+points
+space
+1998
+museum
+change
+middle
+common
+##0
+move
+tv
+post
+##ta
+lake
+seven
+tried
+elected
+closed
+ten
+paul
+minister
+##th
+months
+start
+chief
+return
+canada
+person
+sea
+release
+similar
+modern
+brought
+rest
+hit
+formed
+mr
+##la
+1997
+floor
+event
+doing
+thomas
+1996
+robert
+care
+killed
+training
+star
+week
+needed
+turn
+finished
+railway
+rather
+news
+health
+sent
+example
+ran
+term
+michael
+coming
+currently
+yes
+forces
+despite
+gold
+areas
+50
+stage
+fact
+29
+dead
+says
+popular
+2018
+originally
+germany
+probably
+developed
+result
+pulled
+friend
+stood
+money
+running
+mi
+signed
+word
+songs
+child
+eventually
+met
+tour
+average
+teams
+minutes
+festival
+current
+deep
+kind
+1995
+decided
+usually
+eastern
+seemed
+##ness
+episode
+bed
+added
+table
+indian
+private
+charles
+route
+available
+idea
+throughout
+centre
+addition
+appointed
+style
+1994
+books
+eight
+construction
+press
+mean
+wall
+friends
+remained
+schools
+study
+##ch
+##um
+institute
+oh
+chinese
+sometimes
+events
+possible
+1992
+australian
+type
+brown
+forward
+talk
+process
+food
+debut
+seat
+performance
+committee
+features
+character
+arts
+herself
+else
+lot
+strong
+russian
+range
+hours
+peter
+arm
+##da
+morning
+dr
+sold
+##ry
+quickly
+directed
+1993
+guitar
+china
+##w
+31
+list
+##ma
+performed
+media
+uk
+players
+smile
+##rs
+myself
+40
+placed
+coach
+province
+towards
+wouldn
+leading
+whole
+boy
+official
+designed
+grand
+census
+##el
+europe
+attack
+japanese
+henry
+1991
+##re
+##os
+cross
+getting
+alone
+action
+lower
+network
+wide
+washington
+japan
+1990
+hospital
+believe
+changed
+sister
+##ar
+hold
+gone
+sir
+hadn
+ship
+##ka
+studies
+academy
+shot
+rights
+below
+base
+bad
+involved
+kept
+largest
+##ist
+bank
+future
+especially
+beginning
+mark
+movement
+section
+female
+magazine
+plan
+professor
+lord
+longer
+##ian
+sat
+walked
+hill
+actually
+civil
+energy
+model
+families
+size
+thus
+aircraft
+completed
+includes
+data
+captain
+##or
+fight
+vocals
+featured
+richard
+bridge
+fourth
+1989
+officer
+stone
+hear
+##ism
+means
+medical
+groups
+management
+self
+lips
+competition
+entire
+lived
+technology
+leaving
+federal
+tournament
+bit
+passed
+hot
+independent
+awards
+kingdom
+mary
+spent
+fine
+doesn
+reported
+##ling
+jack
+fall
+raised
+itself
+stay
+true
+studio
+1988
+sports
+replaced
+paris
+systems
+saint
+leader
+theatre
+whose
+market
+capital
+parents
+spanish
+canadian
+earth
+##ity
+cut
+degree
+writing
+bay
+christian
+awarded
+natural
+higher
+bill
+##as
+coast
+provided
+previous
+senior
+ft
+valley
+organization
+stopped
+onto
+countries
+parts
+conference
+queen
+security
+interest
+saying
+allowed
+master
+earlier
+phone
+matter
+smith
+winning
+try
+happened
+moving
+campaign
+los
+##ley
+breath
+nearly
+mid
+1987
+certain
+girls
+date
+italian
+african
+standing
+fell
+artist
+##ted
+shows
+deal
+mine
+industry
+1986
+##ng
+everyone
+republic
+provide
+collection
+library
+student
+##ville
+primary
+owned
+older
+via
+heavy
+1st
+makes
+##able
+attention
+anyone
+africa
+##ri
+stated
+length
+ended
+fingers
+command
+staff
+skin
+foreign
+opening
+governor
+okay
+medal
+kill
+sun
+cover
+job
+1985
+introduced
+chest
+hell
+feeling
+##ies
+success
+meet
+reason
+standard
+meeting
+novel
+1984
+trade
+source
+buildings
+##land
+rose
+guy
+goal
+##ur
+chapter
+native
+husband
+previously
+unit
+limited
+entered
+weeks
+producer
+operations
+mountain
+takes
+covered
+forced
+related
+roman
+complete
+successful
+key
+texas
+cold
+##ya
+channel
+1980
+traditional
+films
+dance
+clear
+approximately
+500
+nine
+van
+prince
+question
+active
+tracks
+ireland
+regional
+silver
+author
+personal
+sense
+operation
+##ine
+economic
+1983
+holding
+twenty
+isbn
+additional
+speed
+hour
+edition
+regular
+historic
+places
+whom
+shook
+movie
+km²
+secretary
+prior
+report
+chicago
+read
+foundation
+view
+engine
+scored
+1982
+units
+ask
+airport
+property
+ready
+immediately
+lady
+month
+listed
+contract
+##de
+manager
+themselves
+lines
+##ki
+navy
+writer
+meant
+##ts
+runs
+##ro
+practice
+championships
+singer
+glass
+commission
+required
+forest
+starting
+culture
+generally
+giving
+access
+attended
+test
+couple
+stand
+catholic
+martin
+caught
+executive
+##less
+eye
+##ey
+thinking
+chair
+quite
+shoulder
+1979
+hope
+decision
+plays
+defeated
+municipality
+whether
+structure
+offered
+slowly
+pain
+ice
+direction
+##ion
+paper
+mission
+1981
+mostly
+200
+noted
+individual
+managed
+nature
+lives
+plant
+##ha
+helped
+except
+studied
+computer
+figure
+relationship
+issue
+significant
+loss
+die
+smiled
+gun
+ago
+highest
+1972
+##am
+male
+bring
+goals
+mexico
+problem
+distance
+commercial
+completely
+location
+annual
+famous
+drive
+1976
+neck
+1978
+surface
+caused
+italy
+understand
+greek
+highway
+wrong
+hotel
+comes
+appearance
+joseph
+double
+issues
+musical
+companies
+castle
+income
+review
+assembly
+bass
+initially
+parliament
+artists
+experience
+1974
+particular
+walk
+foot
+engineering
+talking
+window
+dropped
+##ter
+miss
+baby
+boys
+break
+1975
+stars
+edge
+remember
+policy
+carried
+train
+stadium
+bar
+sex
+angeles
+evidence
+##ge
+becoming
+assistant
+soviet
+1977
+upper
+step
+wing
+1970
+youth
+financial
+reach
+##ll
+actor
+numerous
+##se
+##st
+nodded
+arrived
+##ation
+minute
+##nt
+believed
+sorry
+complex
+beautiful
+victory
+associated
+temple
+1968
+1973
+chance
+perhaps
+metal
+##son
+1945
+bishop
+##et
+lee
+launched
+particularly
+tree
+le
+retired
+subject
+prize
+contains
+yeah
+theory
+empire
+##ce
+suddenly
+waiting
+trust
+recording
+##to
+happy
+terms
+camp
+champion
+1971
+religious
+pass
+zealand
+names
+2nd
+port
+ancient
+tom
+corner
+represented
+watch
+legal
+anti
+justice
+cause
+watched
+brothers
+45
+material
+changes
+simply
+response
+louis
+fast
+##ting
+answer
+60
+historical
+1969
+stories
+straight
+create
+feature
+increased
+rate
+administration
+virginia
+el
+activities
+cultural
+overall
+winner
+programs
+basketball
+legs
+guard
+beyond
+cast
+doctor
+mm
+flight
+results
+remains
+cost
+effect
+winter
+##ble
+larger
+islands
+problems
+chairman
+grew
+commander
+isn
+1967
+pay
+failed
+selected
+hurt
+fort
+box
+regiment
+majority
+journal
+35
+edward
+plans
+##ke
+##ni
+shown
+pretty
+irish
+characters
+directly
+scene
+likely
+operated
+allow
+spring
+##j
+junior
+matches
+looks
+mike
+houses
+fellow
+##tion
+beach
+marriage
+##ham
+##ive
+rules
+oil
+65
+florida
+expected
+nearby
+congress
+sam
+peace
+recent
+iii
+wait
+subsequently
+cell
+##do
+variety
+serving
+agreed
+please
+poor
+joe
+pacific
+attempt
+wood
+democratic
+piece
+prime
+##ca
+rural
+mile
+touch
+appears
+township
+1964
+1966
+soldiers
+##men
+##ized
+1965
+pennsylvania
+closer
+fighting
+claimed
+score
+jones
+physical
+editor
+##ous
+filled
+genus
+specific
+sitting
+super
+mom
+##va
+therefore
+supported
+status
+fear
+cases
+store
+meaning
+wales
+minor
+spain
+tower
+focus
+vice
+frank
+follow
+parish
+separate
+golden
+horse
+fifth
+remaining
+branch
+32
+presented
+stared
+##id
+uses
+secret
+forms
+##co
+baseball
+exactly
+##ck
+choice
+note
+discovered
+travel
+composed
+truth
+russia
+ball
+color
+kiss
+dad
+wind
+continue
+ring
+referred
+numbers
+digital
+greater
+##ns
+metres
+slightly
+direct
+increase
+1960
+responsible
+crew
+rule
+trees
+troops
+##no
+broke
+goes
+individuals
+hundred
+weight
+creek
+sleep
+memory
+defense
+provides
+ordered
+code
+value
+jewish
+windows
+1944
+safe
+judge
+whatever
+corps
+realized
+growing
+pre
+##ga
+cities
+alexander
+gaze
+lies
+spread
+scott
+letter
+showed
+situation
+mayor
+transport
+watching
+workers
+extended
+##li
+expression
+normal
+##ment
+chart
+multiple
+border
+##ba
+host
+##ner
+daily
+mrs
+walls
+piano
+##ko
+heat
+cannot
+##ate
+earned
+products
+drama
+era
+authority
+seasons
+join
+grade
+##io
+sign
+difficult
+machine
+1963
+territory
+mainly
+##wood
+stations
+squadron
+1962
+stepped
+iron
+19th
+##led
+serve
+appear
+sky
+speak
+broken
+charge
+knowledge
+kilometres
+removed
+ships
+article
+campus
+simple
+##ty
+pushed
+britain
+##ve
+leaves
+recently
+cd
+soft
+boston
+latter
+easy
+acquired
+poland
+##sa
+quality
+officers
+presence
+planned
+nations
+mass
+broadcast
+jean
+share
+image
+influence
+wild
+offer
+emperor
+electric
+reading
+headed
+ability
+promoted
+yellow
+ministry
+1942
+throat
+smaller
+politician
+##by
+latin
+spoke
+cars
+williams
+males
+lack
+pop
+80
+##ier
+acting
+seeing
+consists
+##ti
+estate
+1961
+pressure
+johnson
+newspaper
+jr
+chris
+olympics
+online
+conditions
+beat
+elements
+walking
+vote
+##field
+needs
+carolina
+text
+featuring
+global
+block
+shirt
+levels
+francisco
+purpose
+females
+et
+dutch
+duke
+ahead
+gas
+twice
+safety
+serious
+turning
+highly
+lieutenant
+firm
+maria
+amount
+mixed
+daniel
+proposed
+perfect
+agreement
+affairs
+3rd
+seconds
+contemporary
+paid
+1943
+prison
+save
+kitchen
+label
+administrative
+intended
+constructed
+academic
+nice
+teacher
+races
+1956
+formerly
+corporation
+ben
+nation
+issued
+shut
+1958
+drums
+housing
+victoria
+seems
+opera
+1959
+graduated
+function
+von
+mentioned
+picked
+build
+recognized
+shortly
+protection
+picture
+notable
+exchange
+elections
+1980s
+loved
+percent
+racing
+fish
+elizabeth
+garden
+volume
+hockey
+1941
+beside
+settled
+##ford
+1940
+competed
+replied
+drew
+1948
+actress
+marine
+scotland
+steel
+glanced
+farm
+steve
+1957
+risk
+tonight
+positive
+magic
+singles
+effects
+gray
+screen
+dog
+##ja
+residents
+bus
+sides
+none
+secondary
+literature
+polish
+destroyed
+flying
+founder
+households
+1939
+lay
+reserve
+usa
+gallery
+##ler
+1946
+industrial
+younger
+approach
+appearances
+urban
+ones
+1950
+finish
+avenue
+powerful
+fully
+growth
+page
+honor
+jersey
+projects
+advanced
+revealed
+basic
+90
+infantry
+pair
+equipment
+visit
+33
+evening
+search
+grant
+effort
+solo
+treatment
+buried
+republican
+primarily
+bottom
+owner
+1970s
+israel
+gives
+jim
+dream
+bob
+remain
+spot
+70
+notes
+produce
+champions
+contact
+ed
+soul
+accepted
+ways
+del
+##ally
+losing
+split
+price
+capacity
+basis
+trial
+questions
+##ina
+1955
+20th
+guess
+officially
+memorial
+naval
+initial
+##ization
+whispered
+median
+engineer
+##ful
+sydney
+##go
+columbia
+strength
+300
+1952
+tears
+senate
+00
+card
+asian
+agent
+1947
+software
+44
+draw
+warm
+supposed
+com
+pro
+##il
+transferred
+leaned
+##at
+candidate
+escape
+mountains
+asia
+potential
+activity
+entertainment
+seem
+traffic
+jackson
+murder
+36
+slow
+product
+orchestra
+haven
+agency
+bbc
+taught
+website
+comedy
+unable
+storm
+planning
+albums
+rugby
+environment
+scientific
+grabbed
+protect
+##hi
+boat
+typically
+1954
+1953
+damage
+principal
+divided
+dedicated
+mount
+ohio
+##berg
+pick
+fought
+driver
+##der
+empty
+shoulders
+sort
+thank
+berlin
+prominent
+account
+freedom
+necessary
+efforts
+alex
+headquarters
+follows
+alongside
+des
+simon
+andrew
+suggested
+operating
+learning
+steps
+1949
+sweet
+technical
+begin
+easily
+34
+teeth
+speaking
+settlement
+scale
+##sh
+renamed
+ray
+max
+enemy
+semi
+joint
+compared
+##rd
+scottish
+leadership
+analysis
+offers
+georgia
+pieces
+captured
+animal
+deputy
+guest
+organized
+##lin
+tony
+combined
+method
+challenge
+1960s
+huge
+wants
+battalion
+sons
+rise
+crime
+types
+facilities
+telling
+path
+1951
+platform
+sit
+1990s
+##lo
+tells
+assigned
+rich
+pull
+##ot
+commonly
+alive
+##za
+letters
+concept
+conducted
+wearing
+happen
+bought
+becomes
+holy
+gets
+ocean
+defeat
+languages
+purchased
+coffee
+occurred
+titled
+##q
+declared
+applied
+sciences
+concert
+sounds
+jazz
+brain
+##me
+painting
+fleet
+tax
+nick
+##ius
+michigan
+count
+animals
+leaders
+episodes
+##line
+content
+##den
+birth
+##it
+clubs
+64
+palace
+critical
+refused
+fair
+leg
+laughed
+returning
+surrounding
+participated
+formation
+lifted
+pointed
+connected
+rome
+medicine
+laid
+taylor
+santa
+powers
+adam
+tall
+shared
+focused
+knowing
+yards
+entrance
+falls
+##wa
+calling
+##ad
+sources
+chosen
+beneath
+resources
+yard
+##ite
+nominated
+silence
+zone
+defined
+##que
+gained
+thirty
+38
+bodies
+moon
+##ard
+adopted
+christmas
+widely
+register
+apart
+iran
+premier
+serves
+du
+unknown
+parties
+##les
+generation
+##ff
+continues
+quick
+fields
+brigade
+quiet
+teaching
+clothes
+impact
+weapons
+partner
+flat
+theater
+supreme
+1938
+37
+relations
+##tor
+plants
+suffered
+1936
+wilson
+kids
+begins
+##age
+1918
+seats
+armed
+internet
+models
+worth
+laws
+400
+communities
+classes
+background
+knows
+thanks
+quarter
+reaching
+humans
+carry
+killing
+format
+kong
+hong
+setting
+75
+architecture
+disease
+railroad
+inc
+possibly
+wish
+arthur
+thoughts
+harry
+doors
+density
+##di
+crowd
+illinois
+stomach
+tone
+unique
+reports
+anyway
+##ir
+liberal
+der
+vehicle
+thick
+dry
+drug
+faced
+largely
+facility
+theme
+holds
+creation
+strange
+colonel
+##mi
+revolution
+bell
+politics
+turns
+silent
+rail
+relief
+independence
+combat
+shape
+write
+determined
+sales
+learned
+4th
+finger
+oxford
+providing
+1937
+heritage
+fiction
+situated
+designated
+allowing
+distribution
+hosted
+##est
+sight
+interview
+estimated
+reduced
+##ria
+toronto
+footballer
+keeping
+guys
+damn
+claim
+motion
+sport
+sixth
+stayed
+##ze
+en
+rear
+receive
+handed
+twelve
+dress
+audience
+granted
+brazil
+##well
+spirit
+##ated
+noticed
+etc
+olympic
+representative
+eric
+tight
+trouble
+reviews
+drink
+vampire
+missing
+roles
+ranked
+newly
+household
+finals
+wave
+critics
+##ee
+phase
+massachusetts
+pilot
+unlike
+philadelphia
+bright
+guns
+crown
+organizations
+roof
+42
+respectively
+clearly
+tongue
+marked
+circle
+fox
+korea
+bronze
+brian
+expanded
+sexual
+supply
+yourself
+inspired
+labour
+fc
+##ah
+reference
+vision
+draft
+connection
+brand
+reasons
+1935
+classic
+driving
+trip
+jesus
+cells
+entry
+1920
+neither
+trail
+claims
+atlantic
+orders
+labor
+nose
+afraid
+identified
+intelligence
+calls
+cancer
+attacked
+passing
+stephen
+positions
+imperial
+grey
+jason
+39
+sunday
+48
+swedish
+avoid
+extra
+uncle
+message
+covers
+allows
+surprise
+materials
+fame
+hunter
+##ji
+1930
+citizens
+figures
+davis
+environmental
+confirmed
+shit
+titles
+di
+performing
+difference
+acts
+attacks
+##ov
+existing
+votes
+opportunity
+nor
+shop
+entirely
+trains
+opposite
+pakistan
+##pa
+develop
+resulted
+representatives
+actions
+reality
+pressed
+##ish
+barely
+wine
+conversation
+faculty
+northwest
+ends
+documentary
+nuclear
+stock
+grace
+sets
+eat
+alternative
+##ps
+bag
+resulting
+creating
+surprised
+cemetery
+1919
+drop
+finding
+sarah
+cricket
+streets
+tradition
+ride
+1933
+exhibition
+target
+ear
+explained
+rain
+composer
+injury
+apartment
+municipal
+educational
+occupied
+netherlands
+clean
+billion
+constitution
+learn
+1914
+maximum
+classical
+francis
+lose
+opposition
+jose
+ontario
+bear
+core
+hills
+rolled
+ending
+drawn
+permanent
+fun
+##tes
+##lla
+lewis
+sites
+chamber
+ryan
+##way
+scoring
+height
+1934
+##house
+lyrics
+staring
+55
+officials
+1917
+snow
+oldest
+##tic
+orange
+##ger
+qualified
+interior
+apparently
+succeeded
+thousand
+dinner
+lights
+existence
+fans
+heavily
+41
+greatest
+conservative
+send
+bowl
+plus
+enter
+catch
+##un
+economy
+duty
+1929
+speech
+authorities
+princess
+performances
+versions
+shall
+graduate
+pictures
+effective
+remembered
+poetry
+desk
+crossed
+starring
+starts
+passenger
+sharp
+##ant
+acres
+ass
+weather
+falling
+rank
+fund
+supporting
+check
+adult
+publishing
+heads
+cm
+southeast
+lane
+##burg
+application
+bc
+##ura
+les
+condition
+transfer
+prevent
+display
+ex
+regions
+earl
+federation
+cool
+relatively
+answered
+besides
+1928
+obtained
+portion
+##town
+mix
+##ding
+reaction
+liked
+dean
+express
+peak
+1932
+##tte
+counter
+religion
+chain
+rare
+miller
+convention
+aid
+lie
+vehicles
+mobile
+perform
+squad
+wonder
+lying
+crazy
+sword
+##ping
+attempted
+centuries
+weren
+philosophy
+category
+##ize
+anna
+interested
+47
+sweden
+wolf
+frequently
+abandoned
+kg
+literary
+alliance
+task
+entitled
+##ay
+threw
+promotion
+factory
+tiny
+soccer
+visited
+matt
+fm
+achieved
+52
+defence
+internal
+persian
+43
+methods
+##ging
+arrested
+otherwise
+cambridge
+programming
+villages
+elementary
+districts
+rooms
+criminal
+conflict
+worry
+trained
+1931
+attempts
+waited
+signal
+bird
+truck
+subsequent
+programme
+##ol
+ad
+49
+communist
+details
+faith
+sector
+patrick
+carrying
+laugh
+##ss
+controlled
+korean
+showing
+origin
+fuel
+evil
+1927
+##ent
+brief
+identity
+darkness
+address
+pool
+missed
+publication
+web
+planet
+ian
+anne
+wings
+invited
+##tt
+briefly
+standards
+kissed
+##be
+ideas
+climate
+causing
+walter
+worse
+albert
+articles
+winners
+desire
+aged
+northeast
+dangerous
+gate
+doubt
+1922
+wooden
+multi
+##ky
+poet
+rising
+funding
+46
+communications
+communication
+violence
+copies
+prepared
+ford
+investigation
+skills
+1924
+pulling
+electronic
+##ak
+##ial
+##han
+containing
+ultimately
+offices
+singing
+understanding
+restaurant
+tomorrow
+fashion
+christ
+ward
+da
+pope
+stands
+5th
+flow
+studios
+aired
+commissioned
+contained
+exist
+fresh
+americans
+##per
+wrestling
+approved
+kid
+employed
+respect
+suit
+1925
+angel
+asking
+increasing
+frame
+angry
+selling
+1950s
+thin
+finds
+##nd
+temperature
+statement
+ali
+explain
+inhabitants
+towns
+extensive
+narrow
+51
+jane
+flowers
+images
+promise
+somewhere
+object
+fly
+closely
+##ls
+1912
+bureau
+cape
+1926
+weekly
+presidential
+legislative
+1921
+##ai
+##au
+launch
+founding
+##ny
+978
+##ring
+artillery
+strike
+un
+institutions
+roll
+writers
+landing
+chose
+kevin
+anymore
+pp
+##ut
+attorney
+fit
+dan
+billboard
+receiving
+agricultural
+breaking
+sought
+dave
+admitted
+lands
+mexican
+##bury
+charlie
+specifically
+hole
+iv
+howard
+credit
+moscow
+roads
+accident
+1923
+proved
+wear
+struck
+hey
+guards
+stuff
+slid
+expansion
+1915
+cat
+anthony
+##kin
+melbourne
+opposed
+sub
+southwest
+architect
+failure
+plane
+1916
+##ron
+map
+camera
+tank
+listen
+regarding
+wet
+introduction
+metropolitan
+link
+ep
+fighter
+inch
+grown
+gene
+anger
+fixed
+buy
+dvd
+khan
+domestic
+worldwide
+chapel
+mill
+functions
+examples
+##head
+developing
+1910
+turkey
+hits
+pocket
+antonio
+papers
+grow
+unless
+circuit
+18th
+concerned
+attached
+journalist
+selection
+journey
+converted
+provincial
+painted
+hearing
+aren
+bands
+negative
+aside
+wondered
+knight
+lap
+survey
+ma
+##ow
+noise
+billy
+##ium
+shooting
+guide
+bedroom
+priest
+resistance
+motor
+homes
+sounded
+giant
+##mer
+150
+scenes
+equal
+comic
+patients
+hidden
+solid
+actual
+bringing
+afternoon
+touched
+funds
+wedding
+consisted
+marie
+canal
+sr
+kim
+treaty
+turkish
+recognition
+residence
+cathedral
+broad
+knees
+incident
+shaped
+fired
+norwegian
+handle
+cheek
+contest
+represent
+##pe
+representing
+beauty
+##sen
+birds
+advantage
+emergency
+wrapped
+drawing
+notice
+pink
+broadcasting
+##ong
+somehow
+bachelor
+seventh
+collected
+registered
+establishment
+alan
+assumed
+chemical
+personnel
+roger
+retirement
+jeff
+portuguese
+wore
+tied
+device
+threat
+progress
+advance
+##ised
+banks
+hired
+manchester
+nfl
+teachers
+structures
+forever
+##bo
+tennis
+helping
+saturday
+sale
+applications
+junction
+hip
+incorporated
+neighborhood
+dressed
+ceremony
+##ds
+influenced
+hers
+visual
+stairs
+decades
+inner
+kansas
+hung
+hoped
+gain
+scheduled
+downtown
+engaged
+austria
+clock
+norway
+certainly
+pale
+protected
+1913
+victor
+employees
+plate
+putting
+surrounded
+##ists
+finishing
+blues
+tropical
+##ries
+minnesota
+consider
+philippines
+accept
+54
+retrieved
+1900
+concern
+anderson
+properties
+institution
+gordon
+successfully
+vietnam
+##dy
+backing
+outstanding
+muslim
+crossing
+folk
+producing
+usual
+demand
+occurs
+observed
+lawyer
+educated
+##ana
+kelly
+string
+pleasure
+budget
+items
+quietly
+colorado
+philip
+typical
+##worth
+derived
+600
+survived
+asks
+mental
+##ide
+56
+jake
+jews
+distinguished
+ltd
+1911
+sri
+extremely
+53
+athletic
+loud
+thousands
+worried
+shadow
+transportation
+horses
+weapon
+arena
+importance
+users
+tim
+objects
+contributed
+dragon
+douglas
+aware
+senator
+johnny
+jordan
+sisters
+engines
+flag
+investment
+samuel
+shock
+capable
+clark
+row
+wheel
+refers
+session
+familiar
+biggest
+wins
+hate
+maintained
+drove
+hamilton
+request
+expressed
+injured
+underground
+churches
+walker
+wars
+tunnel
+passes
+stupid
+agriculture
+softly
+cabinet
+regarded
+joining
+indiana
+##ea
+##ms
+push
+dates
+spend
+behavior
+woods
+protein
+gently
+chase
+morgan
+mention
+burning
+wake
+combination
+occur
+mirror
+leads
+jimmy
+indeed
+impossible
+singapore
+paintings
+covering
+##nes
+soldier
+locations
+attendance
+sell
+historian
+wisconsin
+invasion
+argued
+painter
+diego
+changing
+egypt
+##don
+experienced
+inches
+##ku
+missouri
+vol
+grounds
+spoken
+switzerland
+##gan
+reform
+rolling
+ha
+forget
+massive
+resigned
+burned
+allen
+tennessee
+locked
+values
+improved
+##mo
+wounded
+universe
+sick
+dating
+facing
+pack
+purchase
+user
+##pur
+moments
+##ul
+merged
+anniversary
+1908
+coal
+brick
+understood
+causes
+dynasty
+queensland
+establish
+stores
+crisis
+promote
+hoping
+views
+cards
+referee
+extension
+##si
+raise
+arizona
+improve
+colonial
+formal
+charged
+##rt
+palm
+lucky
+hide
+rescue
+faces
+95
+feelings
+candidates
+juan
+##ell
+goods
+6th
+courses
+weekend
+59
+luke
+cash
+fallen
+##om
+delivered
+affected
+installed
+carefully
+tries
+swiss
+hollywood
+costs
+lincoln
+responsibility
+##he
+shore
+file
+proper
+normally
+maryland
+assistance
+jump
+constant
+offering
+friendly
+waters
+persons
+realize
+contain
+trophy
+800
+partnership
+factor
+58
+musicians
+cry
+bound
+oregon
+indicated
+hero
+houston
+medium
+##ure
+consisting
+somewhat
+##ara
+57
+cycle
+##che
+beer
+moore
+frederick
+gotten
+eleven
+worst
+weak
+approached
+arranged
+chin
+loan
+universal
+bond
+fifteen
+pattern
+disappeared
+##ney
+translated
+##zed
+lip
+arab
+capture
+interests
+insurance
+##chi
+shifted
+cave
+prix
+warning
+sections
+courts
+coat
+plot
+smell
+feed
+golf
+favorite
+maintain
+knife
+vs
+voted
+degrees
+finance
+quebec
+opinion
+translation
+manner
+ruled
+operate
+productions
+choose
+musician
+discovery
+confused
+tired
+separated
+stream
+techniques
+committed
+attend
+ranking
+kings
+throw
+passengers
+measure
+horror
+fan
+mining
+sand
+danger
+salt
+calm
+decade
+dam
+require
+runner
+##ik
+rush
+associate
+greece
+##ker
+rivers
+consecutive
+matthew
+##ski
+sighed
+sq
+documents
+steam
+edited
+closing
+tie
+accused
+1905
+##ini
+islamic
+distributed
+directors
+organisation
+bruce
+7th
+breathing
+mad
+lit
+arrival
+concrete
+taste
+08
+composition
+shaking
+faster
+amateur
+adjacent
+stating
+1906
+twin
+flew
+##ran
+tokyo
+publications
+##tone
+obviously
+ridge
+storage
+1907
+carl
+pages
+concluded
+desert
+driven
+universities
+ages
+terminal
+sequence
+borough
+250
+constituency
+creative
+cousin
+economics
+dreams
+margaret
+notably
+reduce
+montreal
+mode
+17th
+ears
+saved
+jan
+vocal
+##ica
+1909
+andy
+##jo
+riding
+roughly
+threatened
+##ise
+meters
+meanwhile
+landed
+compete
+repeated
+grass
+czech
+regularly
+charges
+tea
+sudden
+appeal
+##ung
+solution
+describes
+pierre
+classification
+glad
+parking
+##ning
+belt
+physics
+99
+rachel
+add
+hungarian
+participate
+expedition
+damaged
+gift
+childhood
+85
+fifty
+##red
+mathematics
+jumped
+letting
+defensive
+mph
+##ux
+##gh
+testing
+##hip
+hundreds
+shoot
+owners
+matters
+smoke
+israeli
+kentucky
+dancing
+mounted
+grandfather
+emma
+designs
+profit
+argentina
+##gs
+truly
+li
+lawrence
+cole
+begun
+detroit
+willing
+branches
+smiling
+decide
+miami
+enjoyed
+recordings
+##dale
+poverty
+ethnic
+gay
+##bi
+gary
+arabic
+09
+accompanied
+##one
+##ons
+fishing
+determine
+residential
+acid
+##ary
+alice
+returns
+starred
+mail
+##ang
+jonathan
+strategy
+##ue
+net
+forty
+cook
+businesses
+equivalent
+commonwealth
+distinct
+ill
+##cy
+seriously
+##ors
+##ped
+shift
+harris
+replace
+rio
+imagine
+formula
+ensure
+##ber
+additionally
+scheme
+conservation
+occasionally
+purposes
+feels
+favor
+##and
+##ore
+1930s
+contrast
+hanging
+hunt
+movies
+1904
+instruments
+victims
+danish
+christopher
+busy
+demon
+sugar
+earliest
+colony
+studying
+balance
+duties
+##ks
+belgium
+slipped
+carter
+05
+visible
+stages
+iraq
+fifa
+##im
+commune
+forming
+zero
+07
+continuing
+talked
+counties
+legend
+bathroom
+option
+tail
+clay
+daughters
+afterwards
+severe
+jaw
+visitors
+##ded
+devices
+aviation
+russell
+kate
+##vi
+entering
+subjects
+##ino
+temporary
+swimming
+forth
+smooth
+ghost
+audio
+bush
+operates
+rocks
+movements
+signs
+eddie
+##tz
+ann
+voices
+honorary
+06
+memories
+dallas
+pure
+measures
+racial
+promised
+66
+harvard
+ceo
+16th
+parliamentary
+indicate
+benefit
+flesh
+dublin
+louisiana
+1902
+1901
+patient
+sleeping
+1903
+membership
+coastal
+medieval
+wanting
+element
+scholars
+rice
+62
+limit
+survive
+makeup
+rating
+definitely
+collaboration
+obvious
+##tan
+boss
+ms
+baron
+birthday
+linked
+soil
+diocese
+##lan
+ncaa
+##mann
+offensive
+shell
+shouldn
+waist
+##tus
+plain
+ross
+organ
+resolution
+manufacturing
+adding
+relative
+kennedy
+98
+whilst
+moth
+marketing
+gardens
+crash
+72
+heading
+partners
+credited
+carlos
+moves
+cable
+##zi
+marshall
+##out
+depending
+bottle
+represents
+rejected
+responded
+existed
+04
+jobs
+denmark
+lock
+##ating
+treated
+graham
+routes
+talent
+commissioner
+drugs
+secure
+tests
+reign
+restored
+photography
+##gi
+contributions
+oklahoma
+designer
+disc
+grin
+seattle
+robin
+paused
+atlanta
+unusual
+##gate
+praised
+las
+laughing
+satellite
+hungary
+visiting
+##sky
+interesting
+factors
+deck
+poems
+norman
+##water
+stuck
+speaker
+rifle
+domain
+premiered
+##her
+dc
+comics
+actors
+01
+reputation
+eliminated
+8th
+ceiling
+prisoners
+script
+##nce
+leather
+austin
+mississippi
+rapidly
+admiral
+parallel
+charlotte
+guilty
+tools
+gender
+divisions
+fruit
+##bs
+laboratory
+nelson
+fantasy
+marry
+rapid
+aunt
+tribe
+requirements
+aspects
+suicide
+amongst
+adams
+bone
+ukraine
+abc
+kick
+sees
+edinburgh
+clothing
+column
+rough
+gods
+hunting
+broadway
+gathered
+concerns
+##ek
+spending
+ty
+12th
+snapped
+requires
+solar
+bones
+cavalry
+##tta
+iowa
+drinking
+waste
+index
+franklin
+charity
+thompson
+stewart
+tip
+flash
+landscape
+friday
+enjoy
+singh
+poem
+listening
+##back
+eighth
+fred
+differences
+adapted
+bomb
+ukrainian
+surgery
+corporate
+masters
+anywhere
+##more
+waves
+odd
+sean
+portugal
+orleans
+dick
+debate
+kent
+eating
+puerto
+cleared
+96
+expect
+cinema
+97
+guitarist
+blocks
+electrical
+agree
+involving
+depth
+dying
+panel
+struggle
+##ged
+peninsula
+adults
+novels
+emerged
+vienna
+metro
+debuted
+shoes
+tamil
+songwriter
+meets
+prove
+beating
+instance
+heaven
+scared
+sending
+marks
+artistic
+passage
+superior
+03
+significantly
+shopping
+##tive
+retained
+##izing
+malaysia
+technique
+cheeks
+##ola
+warren
+maintenance
+destroy
+extreme
+allied
+120
+appearing
+##yn
+fill
+advice
+alabama
+qualifying
+policies
+cleveland
+hat
+battery
+smart
+authors
+10th
+soundtrack
+acted
+dated
+lb
+glance
+equipped
+coalition
+funny
+outer
+ambassador
+roy
+possibility
+couples
+campbell
+dna
+loose
+ethan
+supplies
+1898
+gonna
+88
+monster
+##res
+shake
+agents
+frequency
+springs
+dogs
+practices
+61
+gang
+plastic
+easier
+suggests
+gulf
+blade
+exposed
+colors
+industries
+markets
+pan
+nervous
+electoral
+charts
+legislation
+ownership
+##idae
+mac
+appointment
+shield
+copy
+assault
+socialist
+abbey
+monument
+license
+throne
+employment
+jay
+93
+replacement
+charter
+cloud
+powered
+suffering
+accounts
+oak
+connecticut
+strongly
+wright
+colour
+crystal
+13th
+context
+welsh
+networks
+voiced
+gabriel
+jerry
+##cing
+forehead
+mp
+##ens
+manage
+schedule
+totally
+remix
+##ii
+forests
+occupation
+print
+nicholas
+brazilian
+strategic
+vampires
+engineers
+76
+roots
+seek
+correct
+instrumental
+und
+alfred
+backed
+hop
+##des
+stanley
+robinson
+traveled
+wayne
+welcome
+austrian
+achieve
+67
+exit
+rates
+1899
+strip
+whereas
+##cs
+sing
+deeply
+adventure
+bobby
+rick
+jamie
+careful
+components
+cap
+useful
+personality
+knee
+##shi
+pushing
+hosts
+02
+protest
+ca
+ottoman
+symphony
+##sis
+63
+boundary
+1890
+processes
+considering
+considerable
+tons
+##work
+##ft
+##nia
+cooper
+trading
+dear
+conduct
+91
+illegal
+apple
+revolutionary
+holiday
+definition
+harder
+##van
+jacob
+circumstances
+destruction
+##lle
+popularity
+grip
+classified
+liverpool
+donald
+baltimore
+flows
+seeking
+honour
+approval
+92
+mechanical
+till
+happening
+statue
+critic
+increasingly
+immediate
+describe
+commerce
+stare
+##ster
+indonesia
+meat
+rounds
+boats
+baker
+orthodox
+depression
+formally
+worn
+naked
+claire
+muttered
+sentence
+11th
+emily
+document
+77
+criticism
+wished
+vessel
+spiritual
+bent
+virgin
+parker
+minimum
+murray
+lunch
+danny
+printed
+compilation
+keyboards
+false
+blow
+belonged
+68
+raising
+78
+cutting
+##board
+pittsburgh
+##up
+9th
+shadows
+81
+hated
+indigenous
+jon
+15th
+barry
+scholar
+ah
+##zer
+oliver
+##gy
+stick
+susan
+meetings
+attracted
+spell
+romantic
+##ver
+ye
+1895
+photo
+demanded
+customers
+##ac
+1896
+logan
+revival
+keys
+modified
+commanded
+jeans
+##ious
+upset
+raw
+phil
+detective
+hiding
+resident
+vincent
+##bly
+experiences
+diamond
+defeating
+coverage
+lucas
+external
+parks
+franchise
+helen
+bible
+successor
+percussion
+celebrated
+il
+lift
+profile
+clan
+romania
+##ied
+mills
+##su
+nobody
+achievement
+shrugged
+fault
+1897
+rhythm
+initiative
+breakfast
+carbon
+700
+69
+lasted
+violent
+74
+wound
+ken
+killer
+gradually
+filmed
+°c
+dollars
+processing
+94
+remove
+criticized
+guests
+sang
+chemistry
+##vin
+legislature
+disney
+##bridge
+uniform
+escaped
+integrated
+proposal
+purple
+denied
+liquid
+karl
+influential
+morris
+nights
+stones
+intense
+experimental
+twisted
+71
+84
+##ld
+pace
+nazi
+mitchell
+ny
+blind
+reporter
+newspapers
+14th
+centers
+burn
+basin
+forgotten
+surviving
+filed
+collections
+monastery
+losses
+manual
+couch
+description
+appropriate
+merely
+tag
+missions
+sebastian
+restoration
+replacing
+triple
+73
+elder
+julia
+warriors
+benjamin
+julian
+convinced
+stronger
+amazing
+declined
+versus
+merchant
+happens
+output
+finland
+bare
+barbara
+absence
+ignored
+dawn
+injuries
+##port
+producers
+##ram
+82
+luis
+##ities
+kw
+admit
+expensive
+electricity
+nba
+exception
+symbol
+##ving
+ladies
+shower
+sheriff
+characteristics
+##je
+aimed
+button
+ratio
+effectively
+summit
+angle
+jury
+bears
+foster
+vessels
+pants
+executed
+evans
+dozen
+advertising
+kicked
+patrol
+1889
+competitions
+lifetime
+principles
+athletics
+##logy
+birmingham
+sponsored
+89
+rob
+nomination
+1893
+acoustic
+##sm
+creature
+longest
+##tra
+credits
+harbor
+dust
+josh
+##so
+territories
+milk
+infrastructure
+completion
+thailand
+indians
+leon
+archbishop
+##sy
+assist
+pitch
+blake
+arrangement
+girlfriend
+serbian
+operational
+hence
+sad
+scent
+fur
+dj
+sessions
+hp
+refer
+rarely
+##ora
+exists
+1892
+##ten
+scientists
+dirty
+penalty
+burst
+portrait
+seed
+79
+pole
+limits
+rival
+1894
+stable
+alpha
+grave
+constitutional
+alcohol
+arrest
+flower
+mystery
+devil
+architectural
+relationships
+greatly
+habitat
+##istic
+larry
+progressive
+remote
+cotton
+##ics
+##ok
+preserved
+reaches
+##ming
+cited
+86
+vast
+scholarship
+decisions
+cbs
+joy
+teach
+1885
+editions
+knocked
+eve
+searching
+partly
+participation
+gap
+animated
+fate
+excellent
+##ett
+na
+87
+alternate
+saints
+youngest
+##ily
+climbed
+##ita
+##tors
+suggest
+##ct
+discussion
+staying
+choir
+lakes
+jacket
+revenue
+nevertheless
+peaked
+instrument
+wondering
+annually
+managing
+neil
+1891
+signing
+terry
+##ice
+apply
+clinical
+brooklyn
+aim
+catherine
+fuck
+farmers
+figured
+ninth
+pride
+hugh
+evolution
+ordinary
+involvement
+comfortable
+shouted
+tech
+encouraged
+taiwan
+representation
+sharing
+##lia
+##em
+panic
+exact
+cargo
+competing
+fat
+cried
+83
+1920s
+occasions
+pa
+cabin
+borders
+utah
+marcus
+##isation
+badly
+muscles
+##ance
+victorian
+transition
+warner
+bet
+permission
+##rin
+slave
+terrible
+similarly
+shares
+seth
+uefa
+possession
+medals
+benefits
+colleges
+lowered
+perfectly
+mall
+transit
+##ye
+##kar
+publisher
+##ened
+harrison
+deaths
+elevation
+##ae
+asleep
+machines
+sigh
+ash
+hardly
+argument
+occasion
+parent
+leo
+decline
+1888
+contribution
+##ua
+concentration
+1000
+opportunities
+hispanic
+guardian
+extent
+emotions
+hips
+mason
+volumes
+bloody
+controversy
+diameter
+steady
+mistake
+phoenix
+identify
+violin
+##sk
+departure
+richmond
+spin
+funeral
+enemies
+1864
+gear
+literally
+connor
+random
+sergeant
+grab
+confusion
+1865
+transmission
+informed
+op
+leaning
+sacred
+suspended
+thinks
+gates
+portland
+luck
+agencies
+yours
+hull
+expert
+muscle
+layer
+practical
+sculpture
+jerusalem
+latest
+lloyd
+statistics
+deeper
+recommended
+warrior
+arkansas
+mess
+supports
+greg
+eagle
+1880
+recovered
+rated
+concerts
+rushed
+##ano
+stops
+eggs
+files
+premiere
+keith
+##vo
+delhi
+turner
+pit
+affair
+belief
+paint
+##zing
+mate
+##ach
+##ev
+victim
+##ology
+withdrew
+bonus
+styles
+fled
+##ud
+glasgow
+technologies
+funded
+nbc
+adaptation
+##ata
+portrayed
+cooperation
+supporters
+judges
+bernard
+justin
+hallway
+ralph
+##ick
+graduating
+controversial
+distant
+continental
+spider
+bite
+##ho
+recognize
+intention
+mixing
+##ese
+egyptian
+bow
+tourism
+suppose
+claiming
+tiger
+dominated
+participants
+vi
+##ru
+nurse
+partially
+tape
+##rum
+psychology
+##rn
+essential
+touring
+duo
+voting
+civilian
+emotional
+channels
+##king
+apparent
+hebrew
+1887
+tommy
+carrier
+intersection
+beast
+hudson
+##gar
+##zo
+lab
+nova
+bench
+discuss
+costa
+##ered
+detailed
+behalf
+drivers
+unfortunately
+obtain
+##lis
+rocky
+##dae
+siege
+friendship
+honey
+##rian
+1861
+amy
+hang
+posted
+governments
+collins
+respond
+wildlife
+preferred
+operator
+##po
+laura
+pregnant
+videos
+dennis
+suspected
+boots
+instantly
+weird
+automatic
+businessman
+alleged
+placing
+throwing
+ph
+mood
+1862
+perry
+venue
+jet
+remainder
+##lli
+##ci
+passion
+biological
+boyfriend
+1863
+dirt
+buffalo
+ron
+segment
+fa
+abuse
+##era
+genre
+thrown
+stroke
+colored
+stress
+exercise
+displayed
+##gen
+struggled
+##tti
+abroad
+dramatic
+wonderful
+thereafter
+madrid
+component
+widespread
+##sed
+tale
+citizen
+todd
+monday
+1886
+vancouver
+overseas
+forcing
+crying
+descent
+##ris
+discussed
+substantial
+ranks
+regime
+1870
+provinces
+switch
+drum
+zane
+ted
+tribes
+proof
+lp
+cream
+researchers
+volunteer
+manor
+silk
+milan
+donated
+allies
+venture
+principle
+delivery
+enterprise
+##ves
+##ans
+bars
+traditionally
+witch
+reminded
+copper
+##uk
+pete
+inter
+links
+colin
+grinned
+elsewhere
+competitive
+frequent
+##oy
+scream
+##hu
+tension
+texts
+submarine
+finnish
+defending
+defend
+pat
+detail
+1884
+affiliated
+stuart
+themes
+villa
+periods
+tool
+belgian
+ruling
+crimes
+answers
+folded
+licensed
+resort
+demolished
+hans
+lucy
+1881
+lion
+traded
+photographs
+writes
+craig
+##fa
+trials
+generated
+beth
+noble
+debt
+percentage
+yorkshire
+erected
+ss
+viewed
+grades
+confidence
+ceased
+islam
+telephone
+retail
+##ible
+chile
+m²
+roberts
+sixteen
+##ich
+commented
+hampshire
+innocent
+dual
+pounds
+checked
+regulations
+afghanistan
+sung
+rico
+liberty
+assets
+bigger
+options
+angels
+relegated
+tribute
+wells
+attending
+leaf
+##yan
+butler
+romanian
+forum
+monthly
+lisa
+patterns
+gmina
+##tory
+madison
+hurricane
+rev
+##ians
+bristol
+##ula
+elite
+valuable
+disaster
+democracy
+awareness
+germans
+freyja
+##ins
+loop
+absolutely
+paying
+populations
+maine
+sole
+prayer
+spencer
+releases
+doorway
+bull
+##ani
+lover
+midnight
+conclusion
+##sson
+thirteen
+lily
+mediterranean
+##lt
+nhl
+proud
+sample
+##hill
+drummer
+guinea
+##ova
+murphy
+climb
+##ston
+instant
+attributed
+horn
+ain
+railways
+steven
+##ao
+autumn
+ferry
+opponent
+root
+traveling
+secured
+corridor
+stretched
+tales
+sheet
+trinity
+cattle
+helps
+indicates
+manhattan
+murdered
+fitted
+1882
+gentle
+grandmother
+mines
+shocked
+vegas
+produces
+##light
+caribbean
+##ou
+belong
+continuous
+desperate
+drunk
+historically
+trio
+waved
+raf
+dealing
+nathan
+bat
+murmured
+interrupted
+residing
+scientist
+pioneer
+harold
+aaron
+##net
+delta
+attempting
+minority
+mini
+believes
+chorus
+tend
+lots
+eyed
+indoor
+load
+shots
+updated
+jail
+##llo
+concerning
+connecting
+wealth
+##ved
+slaves
+arrive
+rangers
+sufficient
+rebuilt
+##wick
+cardinal
+flood
+muhammad
+whenever
+relation
+runners
+moral
+repair
+viewers
+arriving
+revenge
+punk
+assisted
+bath
+fairly
+breathe
+lists
+innings
+illustrated
+whisper
+nearest
+voters
+clinton
+ties
+ultimate
+screamed
+beijing
+lions
+andre
+fictional
+gathering
+comfort
+radar
+suitable
+dismissed
+hms
+ban
+pine
+wrist
+atmosphere
+voivodeship
+bid
+timber
+##ned
+##nan
+giants
+##ane
+cameron
+recovery
+uss
+identical
+categories
+switched
+serbia
+laughter
+noah
+ensemble
+therapy
+peoples
+touching
+##off
+locally
+pearl
+platforms
+everywhere
+ballet
+tables
+lanka
+herbert
+outdoor
+toured
+derek
+1883
+spaces
+contested
+swept
+1878
+exclusive
+slight
+connections
+##dra
+winds
+prisoner
+collective
+bangladesh
+tube
+publicly
+wealthy
+thai
+##ys
+isolated
+select
+##ric
+insisted
+pen
+fortune
+ticket
+spotted
+reportedly
+animation
+enforcement
+tanks
+110
+decides
+wider
+lowest
+owen
+##time
+nod
+hitting
+##hn
+gregory
+furthermore
+magazines
+fighters
+solutions
+##ery
+pointing
+requested
+peru
+reed
+chancellor
+knights
+mask
+worker
+eldest
+flames
+reduction
+1860
+volunteers
+##tis
+reporting
+##hl
+wire
+advisory
+endemic
+origins
+settlers
+pursue
+knock
+consumer
+1876
+eu
+compound
+creatures
+mansion
+sentenced
+ivan
+deployed
+guitars
+frowned
+involves
+mechanism
+kilometers
+perspective
+shops
+maps
+terminus
+duncan
+alien
+fist
+bridges
+##pers
+heroes
+fed
+derby
+swallowed
+##ros
+patent
+sara
+illness
+characterized
+adventures
+slide
+hawaii
+jurisdiction
+##op
+organised
+##side
+adelaide
+walks
+biology
+se
+##ties
+rogers
+swing
+tightly
+boundaries
+##rie
+prepare
+implementation
+stolen
+##sha
+certified
+colombia
+edwards
+garage
+##mm
+recalled
+##ball
+rage
+harm
+nigeria
+breast
+##ren
+furniture
+pupils
+settle
+##lus
+cuba
+balls
+client
+alaska
+21st
+linear
+thrust
+celebration
+latino
+genetic
+terror
+##cia
+##ening
+lightning
+fee
+witness
+lodge
+establishing
+skull
+##ique
+earning
+hood
+##ei
+rebellion
+wang
+sporting
+warned
+missile
+devoted
+activist
+porch
+worship
+fourteen
+package
+1871
+decorated
+##shire
+housed
+##ock
+chess
+sailed
+doctors
+oscar
+joan
+treat
+garcia
+harbour
+jeremy
+##ire
+traditions
+dominant
+jacques
+##gon
+##wan
+relocated
+1879
+amendment
+sized
+companion
+simultaneously
+volleyball
+spun
+acre
+increases
+stopping
+loves
+belongs
+affect
+drafted
+tossed
+scout
+battles
+1875
+filming
+shoved
+munich
+tenure
+vertical
+romance
+pc
+##cher
+argue
+##ical
+craft
+ranging
+www
+opens
+honest
+tyler
+yesterday
+virtual
+##let
+muslims
+reveal
+snake
+immigrants
+radical
+screaming
+speakers
+firing
+saving
+belonging
+ease
+lighting
+prefecture
+blame
+farmer
+hungry
+grows
+rubbed
+beam
+sur
+subsidiary
+##cha
+armenian
+sao
+dropping
+conventional
+##fer
+microsoft
+reply
+qualify
+spots
+1867
+sweat
+festivals
+##ken
+immigration
+physician
+discover
+exposure
+sandy
+explanation
+isaac
+implemented
+##fish
+hart
+initiated
+connect
+stakes
+presents
+heights
+householder
+pleased
+tourist
+regardless
+slip
+closest
+##ction
+surely
+sultan
+brings
+riley
+preparation
+aboard
+slammed
+baptist
+experiment
+ongoing
+interstate
+organic
+playoffs
+##ika
+1877
+130
+##tar
+hindu
+error
+tours
+tier
+plenty
+arrangements
+talks
+trapped
+excited
+sank
+ho
+athens
+1872
+denver
+welfare
+suburb
+athletes
+trick
+diverse
+belly
+exclusively
+yelled
+1868
+##med
+conversion
+##ette
+1874
+internationally
+computers
+conductor
+abilities
+sensitive
+hello
+dispute
+measured
+globe
+rocket
+prices
+amsterdam
+flights
+tigers
+inn
+municipalities
+emotion
+references
+3d
+##mus
+explains
+airlines
+manufactured
+pm
+archaeological
+1873
+interpretation
+devon
+comment
+##ites
+settlements
+kissing
+absolute
+improvement
+suite
+impressed
+barcelona
+sullivan
+jefferson
+towers
+jesse
+julie
+##tin
+##lu
+grandson
+hi
+gauge
+regard
+rings
+interviews
+trace
+raymond
+thumb
+departments
+burns
+serial
+bulgarian
+scores
+demonstrated
+##ix
+1866
+kyle
+alberta
+underneath
+romanized
+##ward
+relieved
+acquisition
+phrase
+cliff
+reveals
+han
+cuts
+merger
+custom
+##dar
+nee
+gilbert
+graduation
+##nts
+assessment
+cafe
+difficulty
+demands
+swung
+democrat
+jennifer
+commons
+1940s
+grove
+##yo
+completing
+focuses
+sum
+substitute
+bearing
+stretch
+reception
+##py
+reflected
+essentially
+destination
+pairs
+##ched
+survival
+resource
+##bach
+promoting
+doubles
+messages
+tear
+##down
+##fully
+parade
+florence
+harvey
+incumbent
+partial
+framework
+900
+pedro
+frozen
+procedure
+olivia
+controls
+##mic
+shelter
+personally
+temperatures
+##od
+brisbane
+tested
+sits
+marble
+comprehensive
+oxygen
+leonard
+##kov
+inaugural
+iranian
+referring
+quarters
+attitude
+##ivity
+mainstream
+lined
+mars
+dakota
+norfolk
+unsuccessful
+##°
+explosion
+helicopter
+congressional
+##sing
+inspector
+bitch
+seal
+departed
+divine
+##ters
+coaching
+examination
+punishment
+manufacturer
+sink
+columns
+unincorporated
+signals
+nevada
+squeezed
+dylan
+dining
+photos
+martial
+manuel
+eighteen
+elevator
+brushed
+plates
+ministers
+ivy
+congregation
+##len
+slept
+specialized
+taxes
+curve
+restricted
+negotiations
+likes
+statistical
+arnold
+inspiration
+execution
+bold
+intermediate
+significance
+margin
+ruler
+wheels
+gothic
+intellectual
+dependent
+listened
+eligible
+buses
+widow
+syria
+earn
+cincinnati
+collapsed
+recipient
+secrets
+accessible
+philippine
+maritime
+goddess
+clerk
+surrender
+breaks
+playoff
+database
+##ified
+##lon
+ideal
+beetle
+aspect
+soap
+regulation
+strings
+expand
+anglo
+shorter
+crosses
+retreat
+tough
+coins
+wallace
+directions
+pressing
+##oon
+shipping
+locomotives
+comparison
+topics
+nephew
+##mes
+distinction
+honors
+travelled
+sierra
+ibn
+##over
+fortress
+sa
+recognised
+carved
+1869
+clients
+##dan
+intent
+##mar
+coaches
+describing
+bread
+##ington
+beaten
+northwestern
+##ona
+merit
+youtube
+collapse
+challenges
+em
+historians
+objective
+submitted
+virus
+attacking
+drake
+assume
+##ere
+diseases
+marc
+stem
+leeds
+##cus
+##ab
+farming
+glasses
+##lock
+visits
+nowhere
+fellowship
+relevant
+carries
+restaurants
+experiments
+101
+constantly
+bases
+targets
+shah
+tenth
+opponents
+verse
+territorial
+##ira
+writings
+corruption
+##hs
+instruction
+inherited
+reverse
+emphasis
+##vic
+employee
+arch
+keeps
+rabbi
+watson
+payment
+uh
+##ala
+nancy
+##tre
+venice
+fastest
+sexy
+banned
+adrian
+properly
+ruth
+touchdown
+dollar
+boards
+metre
+circles
+edges
+favour
+comments
+ok
+travels
+liberation
+scattered
+firmly
+##ular
+holland
+permitted
+diesel
+kenya
+den
+originated
+##ral
+demons
+resumed
+dragged
+rider
+##rus
+servant
+blinked
+extend
+torn
+##ias
+##sey
+input
+meal
+everybody
+cylinder
+kinds
+camps
+##fe
+bullet
+logic
+##wn
+croatian
+evolved
+healthy
+fool
+chocolate
+wise
+preserve
+pradesh
+##ess
+respective
+1850
+##ew
+chicken
+artificial
+gross
+corresponding
+convicted
+cage
+caroline
+dialogue
+##dor
+narrative
+stranger
+mario
+br
+christianity
+failing
+trent
+commanding
+buddhist
+1848
+maurice
+focusing
+yale
+bike
+altitude
+##ering
+mouse
+revised
+##sley
+veteran
+##ig
+pulls
+theology
+crashed
+campaigns
+legion
+##ability
+drag
+excellence
+customer
+cancelled
+intensity
+excuse
+##lar
+liga
+participating
+contributing
+printing
+##burn
+variable
+##rk
+curious
+bin
+legacy
+renaissance
+##my
+symptoms
+binding
+vocalist
+dancer
+##nie
+grammar
+gospel
+democrats
+ya
+enters
+sc
+diplomatic
+hitler
+##ser
+clouds
+mathematical
+quit
+defended
+oriented
+##heim
+fundamental
+hardware
+impressive
+equally
+convince
+confederate
+guilt
+chuck
+sliding
+##ware
+magnetic
+narrowed
+petersburg
+bulgaria
+otto
+phd
+skill
+##ama
+reader
+hopes
+pitcher
+reservoir
+hearts
+automatically
+expecting
+mysterious
+bennett
+extensively
+imagined
+seeds
+monitor
+fix
+##ative
+journalism
+struggling
+signature
+ranch
+encounter
+photographer
+observation
+protests
+##pin
+influences
+##hr
+calendar
+##all
+cruz
+croatia
+locomotive
+hughes
+naturally
+shakespeare
+basement
+hook
+uncredited
+faded
+theories
+approaches
+dare
+phillips
+filling
+fury
+obama
+##ain
+efficient
+arc
+deliver
+min
+raid
+breeding
+inducted
+leagues
+efficiency
+axis
+montana
+eagles
+##ked
+supplied
+instructions
+karen
+picking
+indicating
+trap
+anchor
+practically
+christians
+tomb
+vary
+occasional
+electronics
+lords
+readers
+newcastle
+faint
+innovation
+collect
+situations
+engagement
+160
+claude
+mixture
+##feld
+peer
+tissue
+logo
+lean
+##ration
+°f
+floors
+##ven
+architects
+reducing
+##our
+##ments
+rope
+1859
+ottawa
+##har
+samples
+banking
+declaration
+proteins
+resignation
+francois
+saudi
+advocate
+exhibited
+armor
+twins
+divorce
+##ras
+abraham
+reviewed
+jo
+temporarily
+matrix
+physically
+pulse
+curled
+##ena
+difficulties
+bengal
+usage
+##ban
+annie
+riders
+certificate
+##pi
+holes
+warsaw
+distinctive
+jessica
+##mon
+mutual
+1857
+customs
+circular
+eugene
+removal
+loaded
+mere
+vulnerable
+depicted
+generations
+dame
+heir
+enormous
+lightly
+climbing
+pitched
+lessons
+pilots
+nepal
+ram
+google
+preparing
+brad
+louise
+renowned
+##₂
+liam
+##ably
+plaza
+shaw
+sophie
+brilliant
+bills
+##bar
+##nik
+fucking
+mainland
+server
+pleasant
+seized
+veterans
+jerked
+fail
+beta
+brush
+radiation
+stored
+warmth
+southeastern
+nate
+sin
+raced
+berkeley
+joke
+athlete
+designation
+trunk
+##low
+roland
+qualification
+archives
+heels
+artwork
+receives
+judicial
+reserves
+##bed
+woke
+installation
+abu
+floating
+fake
+lesser
+excitement
+interface
+concentrated
+addressed
+characteristic
+amanda
+saxophone
+monk
+auto
+##bus
+releasing
+egg
+dies
+interaction
+defender
+ce
+outbreak
+glory
+loving
+##bert
+sequel
+consciousness
+http
+awake
+ski
+enrolled
+##ress
+handling
+rookie
+brow
+somebody
+biography
+warfare
+amounts
+contracts
+presentation
+fabric
+dissolved
+challenged
+meter
+psychological
+lt
+elevated
+rally
+accurate
+##tha
+hospitals
+undergraduate
+specialist
+venezuela
+exhibit
+shed
+nursing
+protestant
+fluid
+structural
+footage
+jared
+consistent
+prey
+##ska
+succession
+reflect
+exile
+lebanon
+wiped
+suspect
+shanghai
+resting
+integration
+preservation
+marvel
+variant
+pirates
+sheep
+rounded
+capita
+sailing
+colonies
+manuscript
+deemed
+variations
+clarke
+functional
+emerging
+boxing
+relaxed
+curse
+azerbaijan
+heavyweight
+nickname
+editorial
+rang
+grid
+tightened
+earthquake
+flashed
+miguel
+rushing
+##ches
+improvements
+boxes
+brooks
+180
+consumption
+molecular
+felix
+societies
+repeatedly
+variation
+aids
+civic
+graphics
+professionals
+realm
+autonomous
+receiver
+delayed
+workshop
+militia
+chairs
+trump
+canyon
+##point
+harsh
+extending
+lovely
+happiness
+##jan
+stake
+eyebrows
+embassy
+wellington
+hannah
+##ella
+sony
+corners
+bishops
+swear
+cloth
+contents
+xi
+namely
+commenced
+1854
+stanford
+nashville
+courage
+graphic
+commitment
+garrison
+##bin
+hamlet
+clearing
+rebels
+attraction
+literacy
+cooking
+ruins
+temples
+jenny
+humanity
+celebrate
+hasn
+freight
+sixty
+rebel
+bastard
+##art
+newton
+##ada
+deer
+##ges
+##ching
+smiles
+delaware
+singers
+##ets
+approaching
+assists
+flame
+##ph
+boulevard
+barrel
+planted
+##ome
+pursuit
+##sia
+consequences
+posts
+shallow
+invitation
+rode
+depot
+ernest
+kane
+rod
+concepts
+preston
+topic
+chambers
+striking
+blast
+arrives
+descendants
+montgomery
+ranges
+worlds
+##lay
+##ari
+span
+chaos
+praise
+##ag
+fewer
+1855
+sanctuary
+mud
+fbi
+##ions
+programmes
+maintaining
+unity
+harper
+bore
+handsome
+closure
+tournaments
+thunder
+nebraska
+linda
+facade
+puts
+satisfied
+argentine
+dale
+cork
+dome
+panama
+##yl
+1858
+tasks
+experts
+##ates
+feeding
+equation
+##las
+##ida
+##tu
+engage
+bryan
+##ax
+um
+quartet
+melody
+disbanded
+sheffield
+blocked
+gasped
+delay
+kisses
+maggie
+connects
+##non
+sts
+poured
+creator
+publishers
+##we
+guided
+ellis
+extinct
+hug
+gaining
+##ord
+complicated
+##bility
+poll
+clenched
+investigate
+##use
+thereby
+quantum
+spine
+cdp
+humor
+kills
+administered
+semifinals
+##du
+encountered
+ignore
+##bu
+commentary
+##maker
+bother
+roosevelt
+140
+plains
+halfway
+flowing
+cultures
+crack
+imprisoned
+neighboring
+airline
+##ses
+##view
+##mate
+##ec
+gather
+wolves
+marathon
+transformed
+##ill
+cruise
+organisations
+carol
+punch
+exhibitions
+numbered
+alarm
+ratings
+daddy
+silently
+##stein
+queens
+colours
+impression
+guidance
+liu
+tactical
+##rat
+marshal
+della
+arrow
+##ings
+rested
+feared
+tender
+owns
+bitter
+advisor
+escort
+##ides
+spare
+farms
+grants
+##ene
+dragons
+encourage
+colleagues
+cameras
+##und
+sucked
+pile
+spirits
+prague
+statements
+suspension
+landmark
+fence
+torture
+recreation
+bags
+permanently
+survivors
+pond
+spy
+predecessor
+bombing
+coup
+##og
+protecting
+transformation
+glow
+##lands
+##book
+dug
+priests
+andrea
+feat
+barn
+jumping
+##chen
+##ologist
+##con
+casualties
+stern
+auckland
+pipe
+serie
+revealing
+ba
+##bel
+trevor
+mercy
+spectrum
+yang
+consist
+governing
+collaborated
+possessed
+epic
+comprises
+blew
+shane
+##ack
+lopez
+honored
+magical
+sacrifice
+judgment
+perceived
+hammer
+mtv
+baronet
+tune
+das
+missionary
+sheets
+350
+neutral
+oral
+threatening
+attractive
+shade
+aims
+seminary
+##master
+estates
+1856
+michel
+wounds
+refugees
+manufacturers
+##nic
+mercury
+syndrome
+porter
+##iya
+##din
+hamburg
+identification
+upstairs
+purse
+widened
+pause
+cared
+breathed
+affiliate
+santiago
+prevented
+celtic
+fisher
+125
+recruited
+byzantine
+reconstruction
+farther
+##mp
+diet
+sake
+au
+spite
+sensation
+##ert
+blank
+separation
+105
+##hon
+vladimir
+armies
+anime
+##lie
+accommodate
+orbit
+cult
+sofia
+archive
+##ify
+##box
+founders
+sustained
+disorder
+honours
+northeastern
+mia
+crops
+violet
+threats
+blanket
+fires
+canton
+followers
+southwestern
+prototype
+voyage
+assignment
+altered
+moderate
+protocol
+pistol
+##eo
+questioned
+brass
+lifting
+1852
+math
+authored
+##ual
+doug
+dimensional
+dynamic
+##san
+1851
+pronounced
+grateful
+quest
+uncomfortable
+boom
+presidency
+stevens
+relating
+politicians
+chen
+barrier
+quinn
+diana
+mosque
+tribal
+cheese
+palmer
+portions
+sometime
+chester
+treasure
+wu
+bend
+download
+millions
+reforms
+registration
+##osa
+consequently
+monitoring
+ate
+preliminary
+brandon
+invented
+ps
+eaten
+exterior
+intervention
+ports
+documented
+log
+displays
+lecture
+sally
+favourite
+##itz
+vermont
+lo
+invisible
+isle
+breed
+##ator
+journalists
+relay
+speaks
+backward
+explore
+midfielder
+actively
+stefan
+procedures
+cannon
+blond
+kenneth
+centered
+servants
+chains
+libraries
+malcolm
+essex
+henri
+slavery
+##hal
+facts
+fairy
+coached
+cassie
+cats
+washed
+cop
+##fi
+announcement
+item
+2000s
+vinyl
+activated
+marco
+frontier
+growled
+curriculum
+##das
+loyal
+accomplished
+leslie
+ritual
+kenny
+##00
+vii
+napoleon
+hollow
+hybrid
+jungle
+stationed
+friedrich
+counted
+##ulated
+platinum
+theatrical
+seated
+col
+rubber
+glen
+1840
+diversity
+healing
+extends
+id
+provisions
+administrator
+columbus
+##oe
+tributary
+te
+assured
+org
+##uous
+prestigious
+examined
+lectures
+grammy
+ronald
+associations
+bailey
+allan
+essays
+flute
+believing
+consultant
+proceedings
+travelling
+1853
+kit
+kerala
+yugoslavia
+buddy
+methodist
+##ith
+burial
+centres
+batman
+##nda
+discontinued
+bo
+dock
+stockholm
+lungs
+severely
+##nk
+citing
+manga
+##ugh
+steal
+mumbai
+iraqi
+robot
+celebrity
+bride
+broadcasts
+abolished
+pot
+joel
+overhead
+franz
+packed
+reconnaissance
+johann
+acknowledged
+introduce
+handled
+doctorate
+developments
+drinks
+alley
+palestine
+##nis
+##aki
+proceeded
+recover
+bradley
+grain
+patch
+afford
+infection
+nationalist
+legendary
+##ath
+interchange
+virtually
+gen
+gravity
+exploration
+amber
+vital
+wishes
+powell
+doctrine
+elbow
+screenplay
+##bird
+contribute
+indonesian
+pet
+creates
+##com
+enzyme
+kylie
+discipline
+drops
+manila
+hunger
+##ien
+layers
+suffer
+fever
+bits
+monica
+keyboard
+manages
+##hood
+searched
+appeals
+##bad
+testament
+grande
+reid
+##war
+beliefs
+congo
+##ification
+##dia
+si
+requiring
+##via
+casey
+1849
+regret
+streak
+rape
+depends
+syrian
+sprint
+pound
+tourists
+upcoming
+pub
+##xi
+tense
+##els
+practiced
+echo
+nationwide
+guild
+motorcycle
+liz
+##zar
+chiefs
+desired
+elena
+bye
+precious
+absorbed
+relatives
+booth
+pianist
+##mal
+citizenship
+exhausted
+wilhelm
+##ceae
+##hed
+noting
+quarterback
+urge
+hectares
+##gue
+ace
+holly
+##tal
+blonde
+davies
+parked
+sustainable
+stepping
+twentieth
+airfield
+galaxy
+nest
+chip
+##nell
+tan
+shaft
+paulo
+requirement
+##zy
+paradise
+tobacco
+trans
+renewed
+vietnamese
+##cker
+##ju
+suggesting
+catching
+holmes
+enjoying
+md
+trips
+colt
+holder
+butterfly
+nerve
+reformed
+cherry
+bowling
+trailer
+carriage
+goodbye
+appreciate
+toy
+joshua
+interactive
+enabled
+involve
+##kan
+collar
+determination
+bunch
+facebook
+recall
+shorts
+superintendent
+episcopal
+frustration
+giovanni
+nineteenth
+laser
+privately
+array
+circulation
+##ovic
+armstrong
+deals
+painful
+permit
+discrimination
+##wi
+aires
+retiring
+cottage
+ni
+##sta
+horizon
+ellen
+jamaica
+ripped
+fernando
+chapters
+playstation
+patron
+lecturer
+navigation
+behaviour
+genes
+georgian
+export
+solomon
+rivals
+swift
+seventeen
+rodriguez
+princeton
+independently
+sox
+1847
+arguing
+entity
+casting
+hank
+criteria
+oakland
+geographic
+milwaukee
+reflection
+expanding
+conquest
+dubbed
+##tv
+halt
+brave
+brunswick
+doi
+arched
+curtis
+divorced
+predominantly
+somerset
+streams
+ugly
+zoo
+horrible
+curved
+buenos
+fierce
+dictionary
+vector
+theological
+unions
+handful
+stability
+chan
+punjab
+segments
+##lly
+altar
+ignoring
+gesture
+monsters
+pastor
+##stone
+thighs
+unexpected
+operators
+abruptly
+coin
+compiled
+associates
+improving
+migration
+pin
+##ose
+compact
+collegiate
+reserved
+##urs
+quarterfinals
+roster
+restore
+assembled
+hurry
+oval
+##cies
+1846
+flags
+martha
+##del
+victories
+sharply
+##rated
+argues
+deadly
+neo
+drawings
+symbols
+performer
+##iel
+griffin
+restrictions
+editing
+andrews
+java
+journals
+arabia
+compositions
+dee
+pierce
+removing
+hindi
+casino
+runway
+civilians
+minds
+nasa
+hotels
+##zation
+refuge
+rent
+retain
+potentially
+conferences
+suburban
+conducting
+##tto
+##tions
+##tle
+descended
+massacre
+##cal
+ammunition
+terrain
+fork
+souls
+counts
+chelsea
+durham
+drives
+cab
+##bank
+perth
+realizing
+palestinian
+finn
+simpson
+##dal
+betty
+##ule
+moreover
+particles
+cardinals
+tent
+evaluation
+extraordinary
+##oid
+inscription
+##works
+wednesday
+chloe
+maintains
+panels
+ashley
+trucks
+##nation
+cluster
+sunlight
+strikes
+zhang
+##wing
+dialect
+canon
+##ap
+tucked
+##ws
+collecting
+##mas
+##can
+##sville
+maker
+quoted
+evan
+franco
+aria
+buying
+cleaning
+eva
+closet
+provision
+apollo
+clinic
+rat
+##ez
+necessarily
+ac
+##gle
+##ising
+venues
+flipped
+cent
+spreading
+trustees
+checking
+authorized
+##sco
+disappointed
+##ado
+notion
+duration
+trumpet
+hesitated
+topped
+brussels
+rolls
+theoretical
+hint
+define
+aggressive
+repeat
+wash
+peaceful
+optical
+width
+allegedly
+mcdonald
+strict
+copyright
+##illa
+investors
+mar
+jam
+witnesses
+sounding
+miranda
+michelle
+privacy
+hugo
+harmony
+##pp
+valid
+lynn
+glared
+nina
+102
+headquartered
+diving
+boarding
+gibson
+##ncy
+albanian
+marsh
+routine
+dealt
+enhanced
+er
+intelligent
+substance
+targeted
+enlisted
+discovers
+spinning
+observations
+pissed
+smoking
+rebecca
+capitol
+visa
+varied
+costume
+seemingly
+indies
+compensation
+surgeon
+thursday
+arsenal
+westminster
+suburbs
+rid
+anglican
+##ridge
+knots
+foods
+alumni
+lighter
+fraser
+whoever
+portal
+scandal
+##ray
+gavin
+advised
+instructor
+flooding
+terrorist
+##ale
+teenage
+interim
+senses
+duck
+teen
+thesis
+abby
+eager
+overcome
+##ile
+newport
+glenn
+rises
+shame
+##cc
+prompted
+priority
+forgot
+bomber
+nicolas
+protective
+360
+cartoon
+katherine
+breeze
+lonely
+trusted
+henderson
+richardson
+relax
+banner
+candy
+palms
+remarkable
+##rio
+legends
+cricketer
+essay
+ordained
+edmund
+rifles
+trigger
+##uri
+##away
+sail
+alert
+1830
+audiences
+penn
+sussex
+siblings
+pursued
+indianapolis
+resist
+rosa
+consequence
+succeed
+avoided
+1845
+##ulation
+inland
+##tie
+##nna
+counsel
+profession
+chronicle
+hurried
+##una
+eyebrow
+eventual
+bleeding
+innovative
+cure
+##dom
+committees
+accounting
+con
+scope
+hardy
+heather
+tenor
+gut
+herald
+codes
+tore
+scales
+wagon
+##oo
+luxury
+tin
+prefer
+fountain
+triangle
+bonds
+darling
+convoy
+dried
+traced
+beings
+troy
+accidentally
+slam
+findings
+smelled
+joey
+lawyers
+outcome
+steep
+bosnia
+configuration
+shifting
+toll
+brook
+performers
+lobby
+philosophical
+construct
+shrine
+aggregate
+boot
+cox
+phenomenon
+savage
+insane
+solely
+reynolds
+lifestyle
+##ima
+nationally
+holdings
+consideration
+enable
+edgar
+mo
+mama
+##tein
+fights
+relegation
+chances
+atomic
+hub
+conjunction
+awkward
+reactions
+currency
+finale
+kumar
+underwent
+steering
+elaborate
+gifts
+comprising
+melissa
+veins
+reasonable
+sunshine
+chi
+solve
+trails
+inhabited
+elimination
+ethics
+huh
+ana
+molly
+consent
+apartments
+layout
+marines
+##ces
+hunters
+bulk
+##oma
+hometown
+##wall
+##mont
+cracked
+reads
+neighbouring
+withdrawn
+admission
+wingspan
+damned
+anthology
+lancashire
+brands
+batting
+forgive
+cuban
+awful
+##lyn
+104
+dimensions
+imagination
+##ade
+dante
+##ship
+tracking
+desperately
+goalkeeper
+##yne
+groaned
+workshops
+confident
+burton
+gerald
+milton
+circus
+uncertain
+slope
+copenhagen
+sophia
+fog
+philosopher
+portraits
+accent
+cycling
+varying
+gripped
+larvae
+garrett
+specified
+scotia
+mature
+luther
+kurt
+rap
+##kes
+aerial
+750
+ferdinand
+heated
+es
+transported
+##shan
+safely
+nonetheless
+##orn
+##gal
+motors
+demanding
+##sburg
+startled
+##brook
+ally
+generate
+caps
+ghana
+stained
+demo
+mentions
+beds
+ap
+afterward
+diary
+##bling
+utility
+##iro
+richards
+1837
+conspiracy
+conscious
+shining
+footsteps
+observer
+cyprus
+urged
+loyalty
+developer
+probability
+olive
+upgraded
+gym
+miracle
+insects
+graves
+1844
+ourselves
+hydrogen
+amazon
+katie
+tickets
+poets
+##pm
+planes
+##pan
+prevention
+witnessed
+dense
+jin
+randy
+tang
+warehouse
+monroe
+bang
+archived
+elderly
+investigations
+alec
+granite
+mineral
+conflicts
+controlling
+aboriginal
+carlo
+##zu
+mechanics
+stan
+stark
+rhode
+skirt
+est
+##berry
+bombs
+respected
+##horn
+imposed
+limestone
+deny
+nominee
+memphis
+grabbing
+disabled
+##als
+amusement
+aa
+frankfurt
+corn
+referendum
+varies
+slowed
+disk
+firms
+unconscious
+incredible
+clue
+sue
+##zhou
+twist
+##cio
+joins
+idaho
+chad
+developers
+computing
+destroyer
+103
+mortal
+tucker
+kingston
+choices
+yu
+carson
+1800
+os
+whitney
+geneva
+pretend
+dimension
+staged
+plateau
+maya
+##une
+freestyle
+##bc
+rovers
+hiv
+##ids
+tristan
+classroom
+prospect
+##hus
+honestly
+diploma
+lied
+thermal
+auxiliary
+feast
+unlikely
+iata
+##tel
+morocco
+pounding
+treasury
+lithuania
+considerably
+1841
+dish
+1812
+geological
+matching
+stumbled
+destroying
+marched
+brien
+advances
+cake
+nicole
+belle
+settling
+measuring
+directing
+##mie
+tuesday
+bassist
+capabilities
+stunned
+fraud
+torpedo
+##list
+##phone
+anton
+wisdom
+surveillance
+ruined
+##ulate
+lawsuit
+healthcare
+theorem
+halls
+trend
+aka
+horizontal
+dozens
+acquire
+lasting
+swim
+hawk
+gorgeous
+fees
+vicinity
+decrease
+adoption
+tactics
+##ography
+pakistani
+##ole
+draws
+##hall
+willie
+burke
+heath
+algorithm
+integral
+powder
+elliott
+brigadier
+jackie
+tate
+varieties
+darker
+##cho
+lately
+cigarette
+specimens
+adds
+##ree
+##ensis
+##inger
+exploded
+finalist
+cia
+murders
+wilderness
+arguments
+nicknamed
+acceptance
+onwards
+manufacture
+robertson
+jets
+tampa
+enterprises
+blog
+loudly
+composers
+nominations
+1838
+ai
+malta
+inquiry
+automobile
+hosting
+viii
+rays
+tilted
+grief
+museums
+strategies
+furious
+euro
+equality
+cohen
+poison
+surrey
+wireless
+governed
+ridiculous
+moses
+##esh
+##room
+vanished
+##ito
+barnes
+attract
+morrison
+istanbul
+##iness
+absent
+rotation
+petition
+janet
+##logical
+satisfaction
+custody
+deliberately
+observatory
+comedian
+surfaces
+pinyin
+novelist
+strictly
+canterbury
+oslo
+monks
+embrace
+ibm
+jealous
+photograph
+continent
+dorothy
+marina
+doc
+excess
+holden
+allegations
+explaining
+stack
+avoiding
+lance
+storyline
+majesty
+poorly
+spike
+dos
+bradford
+raven
+travis
+classics
+proven
+voltage
+pillow
+fists
+butt
+1842
+interpreted
+##car
+1839
+gage
+telegraph
+lens
+promising
+expelled
+casual
+collector
+zones
+##min
+silly
+nintendo
+##kh
+##bra
+downstairs
+chef
+suspicious
+afl
+flies
+vacant
+uganda
+pregnancy
+condemned
+lutheran
+estimates
+cheap
+decree
+saxon
+proximity
+stripped
+idiot
+deposits
+contrary
+presenter
+magnus
+glacier
+im
+offense
+edwin
+##ori
+upright
+##long
+bolt
+##ois
+toss
+geographical
+##izes
+environments
+delicate
+marking
+abstract
+xavier
+nails
+windsor
+plantation
+occurring
+equity
+saskatchewan
+fears
+drifted
+sequences
+vegetation
+revolt
+##stic
+1843
+sooner
+fusion
+opposing
+nato
+skating
+1836
+secretly
+ruin
+lease
+##oc
+edit
+##nne
+flora
+anxiety
+ruby
+##ological
+##mia
+tel
+bout
+taxi
+emmy
+frost
+rainbow
+compounds
+foundations
+rainfall
+assassination
+nightmare
+dominican
+##win
+achievements
+deserve
+orlando
+intact
+armenia
+##nte
+calgary
+valentine
+106
+marion
+proclaimed
+theodore
+bells
+courtyard
+thigh
+gonzalez
+console
+troop
+minimal
+monte
+everyday
+##ence
+##if
+supporter
+terrorism
+buck
+openly
+presbyterian
+activists
+carpet
+##iers
+rubbing
+uprising
+##yi
+cute
+conceived
+legally
+##cht
+millennium
+cello
+velocity
+ji
+rescued
+cardiff
+1835
+rex
+concentrate
+senators
+beard
+rendered
+glowing
+battalions
+scouts
+competitors
+sculptor
+catalogue
+arctic
+ion
+raja
+bicycle
+wow
+glancing
+lawn
+##woman
+gentleman
+lighthouse
+publish
+predicted
+calculated
+##val
+variants
+##gne
+strain
+##ui
+winston
+deceased
+##nus
+touchdowns
+brady
+caleb
+sinking
+echoed
+crush
+hon
+blessed
+protagonist
+hayes
+endangered
+magnitude
+editors
+##tine
+estimate
+responsibilities
+##mel
+backup
+laying
+consumed
+sealed
+zurich
+lovers
+frustrated
+##eau
+ahmed
+kicking
+mit
+treasurer
+1832
+biblical
+refuse
+terrified
+pump
+agrees
+genuine
+imprisonment
+refuses
+plymouth
+##hen
+lou
+##nen
+tara
+trembling
+antarctic
+ton
+learns
+##tas
+crap
+crucial
+faction
+atop
+##borough
+wrap
+lancaster
+odds
+hopkins
+erik
+lyon
+##eon
+bros
+##ode
+snap
+locality
+tips
+empress
+crowned
+cal
+acclaimed
+chuckled
+##ory
+clara
+sends
+mild
+towel
+##fl
+##day
+##а
+wishing
+assuming
+interviewed
+##bal
+##die
+interactions
+eden
+cups
+helena
+##lf
+indie
+beck
+##fire
+batteries
+filipino
+wizard
+parted
+##lam
+traces
+##born
+rows
+idol
+albany
+delegates
+##ees
+##sar
+discussions
+##ex
+notre
+instructed
+belgrade
+highways
+suggestion
+lauren
+possess
+orientation
+alexandria
+abdul
+beats
+salary
+reunion
+ludwig
+alright
+wagner
+intimate
+pockets
+slovenia
+hugged
+brighton
+merchants
+cruel
+stole
+trek
+slopes
+repairs
+enrollment
+politically
+underlying
+promotional
+counting
+boeing
+##bb
+isabella
+naming
+##и
+keen
+bacteria
+listing
+separately
+belfast
+ussr
+450
+lithuanian
+anybody
+ribs
+sphere
+martinez
+cock
+embarrassed
+proposals
+fragments
+nationals
+##fs
+##wski
+premises
+fin
+1500
+alpine
+matched
+freely
+bounded
+jace
+sleeve
+##af
+gaming
+pier
+populated
+evident
+##like
+frances
+flooded
+##dle
+frightened
+pour
+trainer
+framed
+visitor
+challenging
+pig
+wickets
+##fold
+infected
+email
+##pes
+arose
+##aw
+reward
+ecuador
+oblast
+vale
+ch
+shuttle
+##usa
+bach
+rankings
+forbidden
+cornwall
+accordance
+salem
+consumers
+bruno
+fantastic
+toes
+machinery
+resolved
+julius
+remembering
+propaganda
+iceland
+bombardment
+tide
+contacts
+wives
+##rah
+concerto
+macdonald
+albania
+implement
+daisy
+tapped
+sudan
+helmet
+angela
+mistress
+##lic
+crop
+sunk
+finest
+##craft
+hostile
+##ute
+##tsu
+boxer
+fr
+paths
+adjusted
+habit
+ballot
+supervision
+soprano
+##zen
+bullets
+wicked
+sunset
+regiments
+disappear
+lamp
+performs
+app
+##gia
+##oa
+rabbit
+digging
+incidents
+entries
+##cion
+dishes
+##oi
+introducing
+##ati
+##fied
+freshman
+slot
+jill
+tackles
+baroque
+backs
+##iest
+lone
+sponsor
+destiny
+altogether
+convert
+##aro
+consensus
+shapes
+demonstration
+basically
+feminist
+auction
+artifacts
+##bing
+strongest
+twitter
+halifax
+2019
+allmusic
+mighty
+smallest
+precise
+alexandra
+viola
+##los
+##ille
+manuscripts
+##illo
+dancers
+ari
+managers
+monuments
+blades
+barracks
+springfield
+maiden
+consolidated
+electron
+##end
+berry
+airing
+wheat
+nobel
+inclusion
+blair
+payments
+geography
+bee
+cc
+eleanor
+react
+##hurst
+afc
+manitoba
+##yu
+su
+lineup
+fitness
+recreational
+investments
+airborne
+disappointment
+##dis
+edmonton
+viewing
+##row
+renovation
+##cast
+infant
+bankruptcy
+roses
+aftermath
+pavilion
+##yer
+carpenter
+withdrawal
+ladder
+##hy
+discussing
+popped
+reliable
+agreements
+rochester
+##abad
+curves
+bombers
+220
+rao
+reverend
+decreased
+choosing
+107
+stiff
+consulting
+naples
+crawford
+tracy
+ka
+ribbon
+cops
+##lee
+crushed
+deciding
+unified
+teenager
+accepting
+flagship
+explorer
+poles
+sanchez
+inspection
+revived
+skilled
+induced
+exchanged
+flee
+locals
+tragedy
+swallow
+loading
+hanna
+demonstrate
+##ela
+salvador
+flown
+contestants
+civilization
+##ines
+wanna
+rhodes
+fletcher
+hector
+knocking
+considers
+##ough
+nash
+mechanisms
+sensed
+mentally
+walt
+unclear
+##eus
+renovated
+madame
+##cks
+crews
+governmental
+##hin
+undertaken
+monkey
+##ben
+##ato
+fatal
+armored
+copa
+caves
+governance
+grasp
+perception
+certification
+froze
+damp
+tugged
+wyoming
+##rg
+##ero
+newman
+##lor
+nerves
+curiosity
+graph
+115
+##ami
+withdraw
+tunnels
+dull
+meredith
+moss
+exhibits
+neighbors
+communicate
+accuracy
+explored
+raiders
+republicans
+secular
+kat
+superman
+penny
+criticised
+##tch
+freed
+update
+conviction
+wade
+ham
+likewise
+delegation
+gotta
+doll
+promises
+technological
+myth
+nationality
+resolve
+convent
+##mark
+sharon
+dig
+sip
+coordinator
+entrepreneur
+fold
+##dine
+capability
+councillor
+synonym
+blown
+swan
+cursed
+1815
+jonas
+haired
+sofa
+canvas
+keeper
+rivalry
+##hart
+rapper
+speedway
+swords
+postal
+maxwell
+estonia
+potter
+recurring
+##nn
+##ave
+errors
+##oni
+cognitive
+1834
+##²
+claws
+nadu
+roberto
+bce
+wrestler
+ellie
+##ations
+infinite
+ink
+##tia
+presumably
+finite
+staircase
+108
+noel
+patricia
+nacional
+##cation
+chill
+eternal
+tu
+preventing
+prussia
+fossil
+limbs
+##logist
+ernst
+frog
+perez
+rene
+##ace
+pizza
+prussian
+##ios
+##vy
+molecules
+regulatory
+answering
+opinions
+sworn
+lengths
+supposedly
+hypothesis
+upward
+habitats
+seating
+ancestors
+drank
+yield
+hd
+synthesis
+researcher
+modest
+##var
+mothers
+peered
+voluntary
+homeland
+##the
+acclaim
+##igan
+static
+valve
+luxembourg
+alto
+carroll
+fe
+receptor
+norton
+ambulance
+##tian
+johnston
+catholics
+depicting
+jointly
+elephant
+gloria
+mentor
+badge
+ahmad
+distinguish
+remarked
+councils
+precisely
+allison
+advancing
+detection
+crowded
+##10
+cooperative
+ankle
+mercedes
+dagger
+surrendered
+pollution
+commit
+subway
+jeffrey
+lesson
+sculptures
+provider
+##fication
+membrane
+timothy
+rectangular
+fiscal
+heating
+teammate
+basket
+particle
+anonymous
+deployment
+##ple
+missiles
+courthouse
+proportion
+shoe
+sec
+##ller
+complaints
+forbes
+blacks
+abandon
+remind
+sizes
+overwhelming
+autobiography
+natalie
+##awa
+risks
+contestant
+countryside
+babies
+scorer
+invaded
+enclosed
+proceed
+hurling
+disorders
+##cu
+reflecting
+continuously
+cruiser
+graduates
+freeway
+investigated
+ore
+deserved
+maid
+blocking
+phillip
+jorge
+shakes
+dove
+mann
+variables
+lacked
+burden
+accompanying
+que
+consistently
+organizing
+provisional
+complained
+endless
+##rm
+tubes
+juice
+georges
+krishna
+mick
+labels
+thriller
+##uch
+laps
+arcade
+sage
+snail
+##table
+shannon
+fi
+laurence
+seoul
+vacation
+presenting
+hire
+churchill
+surprisingly
+prohibited
+savannah
+technically
+##oli
+170
+##lessly
+testimony
+suited
+speeds
+toys
+romans
+mlb
+flowering
+measurement
+talented
+kay
+settings
+charleston
+expectations
+shattered
+achieving
+triumph
+ceremonies
+portsmouth
+lanes
+mandatory
+loser
+stretching
+cologne
+realizes
+seventy
+cornell
+careers
+webb
+##ulating
+americas
+budapest
+ava
+suspicion
+##ison
+yo
+conrad
+##hai
+sterling
+jessie
+rector
+##az
+1831
+transform
+organize
+loans
+christine
+volcanic
+warrant
+slender
+summers
+subfamily
+newer
+danced
+dynamics
+rhine
+proceeds
+heinrich
+gastropod
+commands
+sings
+facilitate
+easter
+ra
+positioned
+responses
+expense
+fruits
+yanked
+imported
+25th
+velvet
+vic
+primitive
+tribune
+baldwin
+neighbourhood
+donna
+rip
+hay
+pr
+##uro
+1814
+espn
+welcomed
+##aria
+qualifier
+glare
+highland
+timing
+##cted
+shells
+eased
+geometry
+louder
+exciting
+slovakia
+##sion
+##iz
+##lot
+savings
+prairie
+##ques
+marching
+rafael
+tonnes
+##lled
+curtain
+preceding
+shy
+heal
+greene
+worthy
+##pot
+detachment
+bury
+sherman
+##eck
+reinforced
+seeks
+bottles
+contracted
+duchess
+outfit
+walsh
+##sc
+mickey
+##ase
+geoffrey
+archer
+squeeze
+dawson
+eliminate
+invention
+##enberg
+neal
+##eth
+stance
+dealer
+coral
+maple
+retire
+polo
+simplified
+##ht
+1833
+hid
+watts
+backwards
+jules
+##oke
+genesis
+mt
+frames
+rebounds
+burma
+woodland
+moist
+santos
+whispers
+drained
+subspecies
+##aa
+streaming
+ulster
+burnt
+correspondence
+maternal
+gerard
+denis
+stealing
+##load
+genius
+duchy
+##oria
+inaugurated
+momentum
+suits
+placement
+sovereign
+clause
+thames
+##hara
+confederation
+reservation
+sketch
+yankees
+lets
+rotten
+charm
+hal
+verses
+ultra
+commercially
+dot
+salon
+citation
+adopt
+winnipeg
+mist
+allocated
+cairo
+##boy
+jenkins
+interference
+objectives
+##wind
+1820
+portfolio
+armoured
+sectors
+##eh
+initiatives
+##world
+integrity
+exercises
+robe
+tap
+ab
+gazed
+##tones
+distracted
+rulers
+111
+favorable
+jerome
+tended
+cart
+factories
+##eri
+diplomat
+valued
+gravel
+charitable
+##try
+calvin
+exploring
+chang
+shepherd
+terrace
+pdf
+pupil
+##ural
+reflects
+ups
+##rch
+governors
+shelf
+depths
+##nberg
+trailed
+crest
+tackle
+##nian
+##ats
+hatred
+##kai
+clare
+makers
+ethiopia
+longtime
+detected
+embedded
+lacking
+slapped
+rely
+thomson
+anticipation
+iso
+morton
+successive
+agnes
+screenwriter
+straightened
+philippe
+playwright
+haunted
+licence
+iris
+intentions
+sutton
+112
+logical
+correctly
+##weight
+branded
+licked
+tipped
+silva
+ricky
+narrator
+requests
+##ents
+greeted
+supernatural
+cow
+##wald
+lung
+refusing
+employer
+strait
+gaelic
+liner
+##piece
+zoe
+sabha
+##mba
+driveway
+harvest
+prints
+bates
+reluctantly
+threshold
+algebra
+ira
+wherever
+coupled
+240
+assumption
+picks
+##air
+designers
+raids
+gentlemen
+##ean
+roller
+blowing
+leipzig
+locks
+screw
+dressing
+strand
+##lings
+scar
+dwarf
+depicts
+##nu
+nods
+##mine
+differ
+boris
+##eur
+yuan
+flip
+##gie
+mob
+invested
+questioning
+applying
+##ture
+shout
+##sel
+gameplay
+blamed
+illustrations
+bothered
+weakness
+rehabilitation
+##of
+##zes
+envelope
+rumors
+miners
+leicester
+subtle
+kerry
+##ico
+ferguson
+##fu
+premiership
+ne
+##cat
+bengali
+prof
+catches
+remnants
+dana
+##rily
+shouting
+presidents
+baltic
+ought
+ghosts
+dances
+sailors
+shirley
+fancy
+dominic
+##bie
+madonna
+##rick
+bark
+buttons
+gymnasium
+ashes
+liver
+toby
+oath
+providence
+doyle
+evangelical
+nixon
+cement
+carnegie
+embarked
+hatch
+surroundings
+guarantee
+needing
+pirate
+essence
+##bee
+filter
+crane
+hammond
+projected
+immune
+percy
+twelfth
+##ult
+regent
+doctoral
+damon
+mikhail
+##ichi
+lu
+critically
+elect
+realised
+abortion
+acute
+screening
+mythology
+steadily
+##fc
+frown
+nottingham
+kirk
+wa
+minneapolis
+##rra
+module
+algeria
+mc
+nautical
+encounters
+surprising
+statues
+availability
+shirts
+pie
+alma
+brows
+munster
+mack
+soup
+crater
+tornado
+sanskrit
+cedar
+explosive
+bordered
+dixon
+planets
+stamp
+exam
+happily
+##bble
+carriers
+kidnapped
+##vis
+accommodation
+emigrated
+##met
+knockout
+correspondent
+violation
+profits
+peaks
+lang
+specimen
+agenda
+ancestry
+pottery
+spelling
+equations
+obtaining
+ki
+linking
+1825
+debris
+asylum
+##20
+buddhism
+teddy
+##ants
+gazette
+##nger
+##sse
+dental
+eligibility
+utc
+fathers
+averaged
+zimbabwe
+francesco
+coloured
+hissed
+translator
+lynch
+mandate
+humanities
+mackenzie
+uniforms
+lin
+##iana
+##gio
+asset
+mhz
+fitting
+samantha
+genera
+wei
+rim
+beloved
+shark
+riot
+entities
+expressions
+indo
+carmen
+slipping
+owing
+abbot
+neighbor
+sidney
+##av
+rats
+recommendations
+encouraging
+squadrons
+anticipated
+commanders
+conquered
+##oto
+donations
+diagnosed
+##mond
+divide
+##iva
+guessed
+decoration
+vernon
+auditorium
+revelation
+conversations
+##kers
+##power
+herzegovina
+dash
+alike
+protested
+lateral
+herman
+accredited
+mg
+##gent
+freeman
+mel
+fiji
+crow
+crimson
+##rine
+livestock
+##pped
+humanitarian
+bored
+oz
+whip
+##lene
+##ali
+legitimate
+alter
+grinning
+spelled
+anxious
+oriental
+wesley
+##nin
+##hole
+carnival
+controller
+detect
+##ssa
+bowed
+educator
+kosovo
+macedonia
+##sin
+occupy
+mastering
+stephanie
+janeiro
+para
+unaware
+nurses
+noon
+135
+cam
+hopefully
+ranger
+combine
+sociology
+polar
+rica
+##eer
+neill
+##sman
+holocaust
+##ip
+doubled
+lust
+1828
+109
+decent
+cooling
+unveiled
+##card
+1829
+nsw
+homer
+chapman
+meyer
+##gin
+dive
+mae
+reagan
+expertise
+##gled
+darwin
+brooke
+sided
+prosecution
+investigating
+comprised
+petroleum
+genres
+reluctant
+differently
+trilogy
+johns
+vegetables
+corpse
+highlighted
+lounge
+pension
+unsuccessfully
+elegant
+aided
+ivory
+beatles
+amelia
+cain
+dubai
+sunny
+immigrant
+babe
+click
+##nder
+underwater
+pepper
+combining
+mumbled
+atlas
+horns
+accessed
+ballad
+physicians
+homeless
+gestured
+rpm
+freak
+louisville
+corporations
+patriots
+prizes
+rational
+warn
+modes
+decorative
+overnight
+din
+troubled
+phantom
+##ort
+monarch
+sheer
+##dorf
+generals
+guidelines
+organs
+addresses
+##zon
+enhance
+curling
+parishes
+cord
+##kie
+linux
+caesar
+deutsche
+bavaria
+##bia
+coleman
+cyclone
+##eria
+bacon
+petty
+##yama
+##old
+hampton
+diagnosis
+1824
+throws
+complexity
+rita
+disputed
+##₃
+pablo
+##sch
+marketed
+trafficking
+##ulus
+examine
+plague
+formats
+##oh
+vault
+faithful
+##bourne
+webster
+##ox
+highlights
+##ient
+##ann
+phones
+vacuum
+sandwich
+modeling
+##gated
+bolivia
+clergy
+qualities
+isabel
+##nas
+##ars
+wears
+screams
+reunited
+annoyed
+bra
+##ancy
+##rate
+differential
+transmitter
+tattoo
+container
+poker
+##och
+excessive
+resides
+cowboys
+##tum
+augustus
+trash
+providers
+statute
+retreated
+balcony
+reversed
+void
+storey
+preceded
+masses
+leap
+laughs
+neighborhoods
+wards
+schemes
+falcon
+santo
+battlefield
+pad
+ronnie
+thread
+lesbian
+venus
+##dian
+beg
+sandstone
+daylight
+punched
+gwen
+analog
+stroked
+wwe
+acceptable
+measurements
+dec
+toxic
+##kel
+adequate
+surgical
+economist
+parameters
+varsity
+##sberg
+quantity
+ella
+##chy
+##rton
+countess
+generating
+precision
+diamonds
+expressway
+ga
+##ı
+1821
+uruguay
+talents
+galleries
+expenses
+scanned
+colleague
+outlets
+ryder
+lucien
+##ila
+paramount
+##bon
+syracuse
+dim
+fangs
+gown
+sweep
+##sie
+toyota
+missionaries
+websites
+##nsis
+sentences
+adviser
+val
+trademark
+spells
+##plane
+patience
+starter
+slim
+##borg
+toe
+incredibly
+shoots
+elliot
+nobility
+##wyn
+cowboy
+endorsed
+gardner
+tendency
+persuaded
+organisms
+emissions
+kazakhstan
+amused
+boring
+chips
+themed
+##hand
+llc
+constantinople
+chasing
+systematic
+guatemala
+borrowed
+erin
+carey
+##hard
+highlands
+struggles
+1810
+##ifying
+##ced
+wong
+exceptions
+develops
+enlarged
+kindergarten
+castro
+##ern
+##rina
+leigh
+zombie
+juvenile
+##most
+consul
+##nar
+sailor
+hyde
+clarence
+intensive
+pinned
+nasty
+useless
+jung
+clayton
+stuffed
+exceptional
+ix
+apostolic
+230
+transactions
+##dge
+exempt
+swinging
+cove
+religions
+##ash
+shields
+dairy
+bypass
+190
+pursuing
+bug
+joyce
+bombay
+chassis
+southampton
+chat
+interact
+redesignated
+##pen
+nascar
+pray
+salmon
+rigid
+regained
+malaysian
+grim
+publicity
+constituted
+capturing
+toilet
+delegate
+purely
+tray
+drift
+loosely
+striker
+weakened
+trinidad
+mitch
+itv
+defines
+transmitted
+ming
+scarlet
+nodding
+fitzgerald
+fu
+narrowly
+sp
+tooth
+standings
+virtue
+##₁
+##wara
+##cting
+chateau
+gloves
+lid
+##nel
+hurting
+conservatory
+##pel
+sinclair
+reopened
+sympathy
+nigerian
+strode
+advocated
+optional
+chronic
+discharge
+##rc
+suck
+compatible
+laurel
+stella
+shi
+fails
+wage
+dodge
+128
+informal
+sorts
+levi
+buddha
+villagers
+##aka
+chronicles
+heavier
+summoned
+gateway
+3000
+eleventh
+jewelry
+translations
+accordingly
+seas
+##ency
+fiber
+pyramid
+cubic
+dragging
+##ista
+caring
+##ops
+android
+contacted
+lunar
+##dt
+kai
+lisbon
+patted
+1826
+sacramento
+theft
+madagascar
+subtropical
+disputes
+ta
+holidays
+piper
+willow
+mare
+cane
+itunes
+newfoundland
+benny
+companions
+dong
+raj
+observe
+roar
+charming
+plaque
+tibetan
+fossils
+enacted
+manning
+bubble
+tina
+tanzania
+##eda
+##hir
+funk
+swamp
+deputies
+cloak
+ufc
+scenario
+par
+scratch
+metals
+anthem
+guru
+engaging
+specially
+##boat
+dialects
+nineteen
+cecil
+duet
+disability
+messenger
+unofficial
+##lies
+defunct
+eds
+moonlight
+drainage
+surname
+puzzle
+honda
+switching
+conservatives
+mammals
+knox
+broadcaster
+sidewalk
+cope
+##ried
+benson
+princes
+peterson
+##sal
+bedford
+sharks
+eli
+wreck
+alberto
+gasp
+archaeology
+lgbt
+teaches
+securities
+madness
+compromise
+waving
+coordination
+davidson
+visions
+leased
+possibilities
+eighty
+jun
+fernandez
+enthusiasm
+assassin
+sponsorship
+reviewer
+kingdoms
+estonian
+laboratories
+##fy
+##nal
+applies
+verb
+celebrations
+##zzo
+rowing
+lightweight
+sadness
+submit
+mvp
+balanced
+dude
+##vas
+explicitly
+metric
+magnificent
+mound
+brett
+mohammad
+mistakes
+irregular
+##hing
+##ass
+sanders
+betrayed
+shipped
+surge
+##enburg
+reporters
+termed
+georg
+pity
+verbal
+bulls
+abbreviated
+enabling
+appealed
+##are
+##atic
+sicily
+sting
+heel
+sweetheart
+bart
+spacecraft
+brutal
+monarchy
+##tter
+aberdeen
+cameo
+diane
+##ub
+survivor
+clyde
+##aries
+complaint
+##makers
+clarinet
+delicious
+chilean
+karnataka
+coordinates
+1818
+panties
+##rst
+pretending
+ar
+dramatically
+kiev
+bella
+tends
+distances
+113
+catalog
+launching
+instances
+telecommunications
+portable
+lindsay
+vatican
+##eim
+angles
+aliens
+marker
+stint
+screens
+bolton
+##rne
+judy
+wool
+benedict
+plasma
+europa
+spark
+imaging
+filmmaker
+swiftly
+##een
+contributor
+##nor
+opted
+stamps
+apologize
+financing
+butter
+gideon
+sophisticated
+alignment
+avery
+chemicals
+yearly
+speculation
+prominence
+professionally
+##ils
+immortal
+institutional
+inception
+wrists
+identifying
+tribunal
+derives
+gains
+##wo
+papal
+preference
+linguistic
+vince
+operative
+brewery
+##ont
+unemployment
+boyd
+##ured
+##outs
+albeit
+prophet
+1813
+bi
+##rr
+##face
+##rad
+quarterly
+asteroid
+cleaned
+radius
+temper
+##llen
+telugu
+jerk
+viscount
+menu
+##ote
+glimpse
+##aya
+yacht
+hawaiian
+baden
+##rl
+laptop
+readily
+##gu
+monetary
+offshore
+scots
+watches
+##yang
+##arian
+upgrade
+needle
+xbox
+lea
+encyclopedia
+flank
+fingertips
+##pus
+delight
+teachings
+confirm
+roth
+beaches
+midway
+winters
+##iah
+teasing
+daytime
+beverly
+gambling
+bonnie
+##backs
+regulated
+clement
+hermann
+tricks
+knot
+##shing
+##uring
+##vre
+detached
+ecological
+owed
+specialty
+byron
+inventor
+bats
+stays
+screened
+unesco
+midland
+trim
+affection
+##ander
+##rry
+jess
+thoroughly
+feedback
+##uma
+chennai
+strained
+heartbeat
+wrapping
+overtime
+pleaded
+##sworth
+mon
+leisure
+oclc
+##tate
+##ele
+feathers
+angelo
+thirds
+nuts
+surveys
+clever
+gill
+commentator
+##dos
+darren
+rides
+gibraltar
+##nc
+##mu
+dissolution
+dedication
+shin
+meals
+saddle
+elvis
+reds
+chaired
+taller
+appreciation
+functioning
+niece
+favored
+advocacy
+robbie
+criminals
+suffolk
+yugoslav
+passport
+constable
+congressman
+hastings
+vera
+##rov
+consecrated
+sparks
+ecclesiastical
+confined
+##ovich
+muller
+floyd
+nora
+1822
+paved
+1827
+cumberland
+ned
+saga
+spiral
+##flow
+appreciated
+yi
+collaborative
+treating
+similarities
+feminine
+finishes
+##ib
+jade
+import
+##nse
+##hot
+champagne
+mice
+securing
+celebrities
+helsinki
+attributes
+##gos
+cousins
+phases
+ache
+lucia
+gandhi
+submission
+vicar
+spear
+shine
+tasmania
+biting
+detention
+constitute
+tighter
+seasonal
+##gus
+terrestrial
+matthews
+##oka
+effectiveness
+parody
+philharmonic
+##onic
+1816
+strangers
+encoded
+consortium
+guaranteed
+regards
+shifts
+tortured
+collision
+supervisor
+inform
+broader
+insight
+theaters
+armour
+emeritus
+blink
+incorporates
+mapping
+##50
+##ein
+handball
+flexible
+##nta
+substantially
+generous
+thief
+##own
+carr
+loses
+1793
+prose
+ucla
+romeo
+generic
+metallic
+realization
+damages
+mk
+commissioners
+zach
+default
+##ther
+helicopters
+lengthy
+stems
+spa
+partnered
+spectators
+rogue
+indication
+penalties
+teresa
+1801
+sen
+##tric
+dalton
+##wich
+irving
+photographic
+##vey
+dell
+deaf
+peters
+excluded
+unsure
+##vable
+patterson
+crawled
+##zio
+resided
+whipped
+latvia
+slower
+ecole
+pipes
+employers
+maharashtra
+comparable
+va
+textile
+pageant
+##gel
+alphabet
+binary
+irrigation
+chartered
+choked
+antoine
+offs
+waking
+supplement
+##wen
+quantities
+demolition
+regain
+locate
+urdu
+folks
+alt
+114
+##mc
+scary
+andreas
+whites
+##ava
+classrooms
+mw
+aesthetic
+publishes
+valleys
+guides
+cubs
+johannes
+bryant
+conventions
+affecting
+##itt
+drain
+awesome
+isolation
+prosecutor
+ambitious
+apology
+captive
+downs
+atmospheric
+lorenzo
+aisle
+beef
+foul
+##onia
+kidding
+composite
+disturbed
+illusion
+natives
+##ffer
+emi
+rockets
+riverside
+wartime
+painters
+adolf
+melted
+##ail
+uncertainty
+simulation
+hawks
+progressed
+meantime
+builder
+spray
+breach
+unhappy
+regina
+russians
+##urg
+determining
+##tation
+tram
+1806
+##quin
+aging
+##12
+1823
+garion
+rented
+mister
+diaz
+terminated
+clip
+1817
+depend
+nervously
+disco
+owe
+defenders
+shiva
+notorious
+disbelief
+shiny
+worcester
+##gation
+##yr
+trailing
+undertook
+islander
+belarus
+limitations
+watershed
+fuller
+overlooking
+utilized
+raphael
+1819
+synthetic
+breakdown
+klein
+##nate
+moaned
+memoir
+lamb
+practicing
+##erly
+cellular
+arrows
+exotic
+##graphy
+witches
+117
+charted
+rey
+hut
+hierarchy
+subdivision
+freshwater
+giuseppe
+aloud
+reyes
+qatar
+marty
+sideways
+utterly
+sexually
+jude
+prayers
+mccarthy
+softball
+blend
+damien
+##gging
+##metric
+wholly
+erupted
+lebanese
+negro
+revenues
+tasted
+comparative
+teamed
+transaction
+labeled
+maori
+sovereignty
+parkway
+trauma
+gran
+malay
+121
+advancement
+descendant
+2020
+buzz
+salvation
+inventory
+symbolic
+##making
+antarctica
+mps
+##gas
+##bro
+mohammed
+myanmar
+holt
+submarines
+tones
+##lman
+locker
+patriarch
+bangkok
+emerson
+remarks
+predators
+kin
+afghan
+confession
+norwich
+rental
+emerge
+advantages
+##zel
+rca
+##hold
+shortened
+storms
+aidan
+##matic
+autonomy
+compliance
+##quet
+dudley
+atp
+##osis
+1803
+motto
+documentation
+summary
+professors
+spectacular
+christina
+archdiocese
+flashing
+innocence
+remake
+##dell
+psychic
+reef
+scare
+employ
+rs
+sticks
+meg
+gus
+leans
+##ude
+accompany
+bergen
+tomas
+##iko
+doom
+wages
+pools
+##nch
+##bes
+breasts
+scholarly
+alison
+outline
+brittany
+breakthrough
+willis
+realistic
+##cut
+##boro
+competitor
+##stan
+pike
+picnic
+icon
+designing
+commercials
+washing
+villain
+skiing
+micro
+costumes
+auburn
+halted
+executives
+##hat
+logistics
+cycles
+vowel
+applicable
+barrett
+exclaimed
+eurovision
+eternity
+ramon
+##umi
+##lls
+modifications
+sweeping
+disgust
+##uck
+torch
+aviv
+ensuring
+rude
+dusty
+sonic
+donovan
+outskirts
+cu
+pathway
+##band
+##gun
+##lines
+disciplines
+acids
+cadet
+paired
+##40
+sketches
+##sive
+marriages
+##⁺
+folding
+peers
+slovak
+implies
+admired
+##beck
+1880s
+leopold
+instinct
+attained
+weston
+megan
+horace
+##ination
+dorsal
+ingredients
+evolutionary
+##its
+complications
+deity
+lethal
+brushing
+levy
+deserted
+institutes
+posthumously
+delivering
+telescope
+coronation
+motivated
+rapids
+luc
+flicked
+pays
+volcano
+tanner
+weighed
+##nica
+crowds
+frankie
+gifted
+addressing
+granddaughter
+winding
+##rna
+constantine
+gomez
+##front
+landscapes
+rudolf
+anthropology
+slate
+werewolf
+##lio
+astronomy
+circa
+rouge
+dreaming
+sack
+knelt
+drowned
+naomi
+prolific
+tracked
+freezing
+herb
+##dium
+agony
+randall
+twisting
+wendy
+deposit
+touches
+vein
+wheeler
+##bbled
+##bor
+batted
+retaining
+tire
+presently
+compare
+specification
+daemon
+nigel
+##grave
+merry
+recommendation
+czechoslovakia
+sandra
+ng
+roma
+##sts
+lambert
+inheritance
+sheikh
+winchester
+cries
+examining
+##yle
+comeback
+cuisine
+nave
+##iv
+ko
+retrieve
+tomatoes
+barker
+polished
+defining
+irene
+lantern
+personalities
+begging
+tract
+swore
+1809
+175
+##gic
+omaha
+brotherhood
+##rley
+haiti
+##ots
+exeter
+##ete
+##zia
+steele
+dumb
+pearson
+210
+surveyed
+elisabeth
+trends
+##ef
+fritz
+##rf
+premium
+bugs
+fraction
+calmly
+viking
+##birds
+tug
+inserted
+unusually
+##ield
+confronted
+distress
+crashing
+brent
+turks
+resign
+##olo
+cambodia
+gabe
+sauce
+##kal
+evelyn
+116
+extant
+clusters
+quarry
+teenagers
+luna
+##lers
+##ister
+affiliation
+drill
+##ashi
+panthers
+scenic
+libya
+anita
+strengthen
+inscriptions
+##cated
+lace
+sued
+judith
+riots
+##uted
+mint
+##eta
+preparations
+midst
+dub
+challenger
+##vich
+mock
+cf
+displaced
+wicket
+breaths
+enables
+schmidt
+analyst
+##lum
+ag
+highlight
+automotive
+axe
+josef
+newark
+sufficiently
+resembles
+50th
+##pal
+flushed
+mum
+traits
+##ante
+commodore
+incomplete
+warming
+titular
+ceremonial
+ethical
+118
+celebrating
+eighteenth
+cao
+lima
+medalist
+mobility
+strips
+snakes
+##city
+miniature
+zagreb
+barton
+escapes
+umbrella
+automated
+doubted
+differs
+cooled
+georgetown
+dresden
+cooked
+fade
+wyatt
+rna
+jacobs
+carlton
+abundant
+stereo
+boost
+madras
+inning
+##hia
+spur
+ip
+malayalam
+begged
+osaka
+groan
+escaping
+charging
+dose
+vista
+##aj
+bud
+papa
+communists
+advocates
+edged
+tri
+##cent
+resemble
+peaking
+necklace
+fried
+montenegro
+saxony
+goose
+glances
+stuttgart
+curator
+recruit
+grocery
+sympathetic
+##tting
+##fort
+127
+lotus
+randolph
+ancestor
+##rand
+succeeding
+jupiter
+1798
+macedonian
+##heads
+hiking
+1808
+handing
+fischer
+##itive
+garbage
+node
+##pies
+prone
+singular
+papua
+inclined
+attractions
+italia
+pouring
+motioned
+grandma
+garnered
+jacksonville
+corp
+ego
+ringing
+aluminum
+##hausen
+ordering
+##foot
+drawer
+traders
+synagogue
+##play
+##kawa
+resistant
+wandering
+fragile
+fiona
+teased
+var
+hardcore
+soaked
+jubilee
+decisive
+exposition
+mercer
+poster
+valencia
+hale
+kuwait
+1811
+##ises
+##wr
+##eed
+tavern
+gamma
+122
+johan
+##uer
+airways
+amino
+gil
+##ury
+vocational
+domains
+torres
+##sp
+generator
+folklore
+outcomes
+##keeper
+canberra
+shooter
+fl
+beams
+confrontation
+##lling
+##gram
+feb
+aligned
+forestry
+pipeline
+jax
+motorway
+conception
+decay
+##tos
+coffin
+##cott
+stalin
+1805
+escorted
+minded
+##nam
+sitcom
+purchasing
+twilight
+veronica
+additions
+passive
+tensions
+straw
+123
+frequencies
+1804
+refugee
+cultivation
+##iate
+christie
+clary
+bulletin
+crept
+disposal
+##rich
+##zong
+processor
+crescent
+##rol
+bmw
+emphasized
+whale
+nazis
+aurora
+##eng
+dwelling
+hauled
+sponsors
+toledo
+mega
+ideology
+theatres
+tessa
+cerambycidae
+saves
+turtle
+cone
+suspects
+kara
+rusty
+yelling
+greeks
+mozart
+shades
+cocked
+participant
+##tro
+shire
+spit
+freeze
+necessity
+##cos
+inmates
+nielsen
+councillors
+loaned
+uncommon
+omar
+peasants
+botanical
+offspring
+daniels
+formations
+jokes
+1794
+pioneers
+sigma
+licensing
+##sus
+wheelchair
+polite
+1807
+liquor
+pratt
+trustee
+##uta
+forewings
+balloon
+##zz
+kilometre
+camping
+explicit
+casually
+shawn
+foolish
+teammates
+nm
+hassan
+carrie
+judged
+satisfy
+vanessa
+knives
+selective
+cnn
+flowed
+##lice
+eclipse
+stressed
+eliza
+mathematician
+cease
+cultivated
+##roy
+commissions
+browns
+##ania
+destroyers
+sheridan
+meadow
+##rius
+minerals
+##cial
+downstream
+clash
+gram
+memoirs
+ventures
+baha
+seymour
+archie
+midlands
+edith
+fare
+flynn
+invite
+canceled
+tiles
+stabbed
+boulder
+incorporate
+amended
+camden
+facial
+mollusk
+unreleased
+descriptions
+yoga
+grabs
+550
+raises
+ramp
+shiver
+##rose
+coined
+pioneering
+tunes
+qing
+warwick
+tops
+119
+melanie
+giles
+##rous
+wandered
+##inal
+annexed
+nov
+30th
+unnamed
+##ished
+organizational
+airplane
+normandy
+stoke
+whistle
+blessing
+violations
+chased
+holders
+shotgun
+##ctic
+outlet
+reactor
+##vik
+tires
+tearing
+shores
+fortified
+mascot
+constituencies
+nc
+columnist
+productive
+tibet
+##rta
+lineage
+hooked
+oct
+tapes
+judging
+cody
+##gger
+hansen
+kashmir
+triggered
+##eva
+solved
+cliffs
+##tree
+resisted
+anatomy
+protesters
+transparent
+implied
+##iga
+injection
+mattress
+excluding
+##mbo
+defenses
+helpless
+devotion
+##elli
+growl
+liberals
+weber
+phenomena
+atoms
+plug
+##iff
+mortality
+apprentice
+howe
+convincing
+aaa
+swimmer
+barber
+leone
+promptly
+sodium
+def
+nowadays
+arise
+##oning
+gloucester
+corrected
+dignity
+norm
+erie
+##ders
+elders
+evacuated
+sylvia
+compression
+##yar
+hartford
+pose
+backpack
+reasoning
+accepts
+24th
+wipe
+millimetres
+marcel
+##oda
+dodgers
+albion
+1790
+overwhelmed
+aerospace
+oaks
+1795
+showcase
+acknowledge
+recovering
+nolan
+ashe
+hurts
+geology
+fashioned
+disappearance
+farewell
+swollen
+shrug
+marquis
+wimbledon
+124
+rue
+1792
+commemorate
+reduces
+experiencing
+inevitable
+calcutta
+intel
+##court
+murderer
+sticking
+fisheries
+imagery
+bloom
+280
+brake
+##inus
+gustav
+hesitation
+memorable
+po
+viral
+beans
+accidents
+tunisia
+antenna
+spilled
+consort
+treatments
+aye
+perimeter
+##gard
+donation
+hostage
+migrated
+banker
+addiction
+apex
+lil
+trout
+##ously
+conscience
+##nova
+rams
+sands
+genome
+passionate
+troubles
+##lets
+##set
+amid
+##ibility
+##ret
+higgins
+exceed
+vikings
+##vie
+payne
+##zan
+muscular
+##ste
+defendant
+sucking
+##wal
+ibrahim
+fuselage
+claudia
+vfl
+europeans
+snails
+interval
+##garh
+preparatory
+statewide
+tasked
+lacrosse
+viktor
+##lation
+angola
+##hra
+flint
+implications
+employs
+teens
+patrons
+stall
+weekends
+barriers
+scrambled
+nucleus
+tehran
+jenna
+parsons
+lifelong
+robots
+displacement
+5000
+##bles
+precipitation
+##gt
+knuckles
+clutched
+1802
+marrying
+ecology
+marx
+accusations
+declare
+scars
+kolkata
+mat
+meadows
+bermuda
+skeleton
+finalists
+vintage
+crawl
+coordinate
+affects
+subjected
+orchestral
+mistaken
+##tc
+mirrors
+dipped
+relied
+260
+arches
+candle
+##nick
+incorporating
+wildly
+fond
+basilica
+owl
+fringe
+rituals
+whispering
+stirred
+feud
+tertiary
+slick
+goat
+honorable
+whereby
+skip
+ricardo
+stripes
+parachute
+adjoining
+submerged
+synthesizer
+##gren
+intend
+positively
+ninety
+phi
+beaver
+partition
+fellows
+alexis
+prohibition
+carlisle
+bizarre
+fraternity
+##bre
+doubts
+icy
+cbc
+aquatic
+sneak
+sonny
+combines
+airports
+crude
+supervised
+spatial
+merge
+alfonso
+##bic
+corrupt
+scan
+undergo
+##ams
+disabilities
+colombian
+comparing
+dolphins
+perkins
+##lish
+reprinted
+unanimous
+bounced
+hairs
+underworld
+midwest
+semester
+bucket
+paperback
+miniseries
+coventry
+demise
+##leigh
+demonstrations
+sensor
+rotating
+yan
+##hler
+arrange
+soils
+##idge
+hyderabad
+labs
+##dr
+brakes
+grandchildren
+##nde
+negotiated
+rover
+ferrari
+continuation
+directorate
+augusta
+stevenson
+counterpart
+gore
+##rda
+nursery
+rican
+ave
+collectively
+broadly
+pastoral
+repertoire
+asserted
+discovering
+nordic
+styled
+fiba
+cunningham
+harley
+middlesex
+survives
+tumor
+tempo
+zack
+aiming
+lok
+urgent
+##rade
+##nto
+devils
+##ement
+contractor
+turin
+##wl
+##ool
+bliss
+repaired
+simmons
+moan
+astronomical
+cr
+negotiate
+lyric
+1890s
+lara
+bred
+clad
+angus
+pbs
+##ience
+engineered
+posed
+##lk
+hernandez
+possessions
+elbows
+psychiatric
+strokes
+confluence
+electorate
+lifts
+campuses
+lava
+alps
+##ep
+##ution
+##date
+physicist
+woody
+##page
+##ographic
+##itis
+juliet
+reformation
+sparhawk
+320
+complement
+suppressed
+jewel
+##½
+floated
+##kas
+continuity
+sadly
+##ische
+inability
+melting
+scanning
+paula
+flour
+judaism
+safer
+vague
+##lm
+solving
+curb
+##stown
+financially
+gable
+bees
+expired
+miserable
+cassidy
+dominion
+1789
+cupped
+145
+robbery
+facto
+amos
+warden
+resume
+tallest
+marvin
+ing
+pounded
+usd
+declaring
+gasoline
+##aux
+darkened
+270
+650
+sophomore
+##mere
+erection
+gossip
+televised
+risen
+dial
+##eu
+pillars
+##link
+passages
+profound
+##tina
+arabian
+ashton
+silicon
+nail
+##ead
+##lated
+##wer
+##hardt
+fleming
+firearms
+ducked
+circuits
+blows
+waterloo
+titans
+##lina
+atom
+fireplace
+cheshire
+financed
+activation
+algorithms
+##zzi
+constituent
+catcher
+cherokee
+partnerships
+sexuality
+platoon
+tragic
+vivian
+guarded
+whiskey
+meditation
+poetic
+##late
+##nga
+##ake
+porto
+listeners
+dominance
+kendra
+mona
+chandler
+factions
+22nd
+salisbury
+attitudes
+derivative
+##ido
+##haus
+intake
+paced
+javier
+illustrator
+barrels
+bias
+cockpit
+burnett
+dreamed
+ensuing
+##anda
+receptors
+someday
+hawkins
+mattered
+##lal
+slavic
+1799
+jesuit
+cameroon
+wasted
+tai
+wax
+lowering
+victorious
+freaking
+outright
+hancock
+librarian
+sensing
+bald
+calcium
+myers
+tablet
+announcing
+barack
+shipyard
+pharmaceutical
+##uan
+greenwich
+flush
+medley
+patches
+wolfgang
+pt
+speeches
+acquiring
+exams
+nikolai
+##gg
+hayden
+kannada
+##type
+reilly
+##pt
+waitress
+abdomen
+devastated
+capped
+pseudonym
+pharmacy
+fulfill
+paraguay
+1796
+clicked
+##trom
+archipelago
+syndicated
+##hman
+lumber
+orgasm
+rejection
+clifford
+lorraine
+advent
+mafia
+rodney
+brock
+##ght
+##used
+##elia
+cassette
+chamberlain
+despair
+mongolia
+sensors
+developmental
+upstream
+##eg
+##alis
+spanning
+165
+trombone
+basque
+seeded
+interred
+renewable
+rhys
+leapt
+revision
+molecule
+##ages
+chord
+vicious
+nord
+shivered
+23rd
+arlington
+debts
+corpus
+sunrise
+bays
+blackburn
+centimetres
+##uded
+shuddered
+gm
+strangely
+gripping
+cartoons
+isabelle
+orbital
+##ppa
+seals
+proving
+##lton
+refusal
+strengthened
+bust
+assisting
+baghdad
+batsman
+portrayal
+mara
+pushes
+spears
+og
+##cock
+reside
+nathaniel
+brennan
+1776
+confirmation
+caucus
+##worthy
+markings
+yemen
+nobles
+ku
+lazy
+viewer
+catalan
+encompasses
+sawyer
+##fall
+sparked
+substances
+patents
+braves
+arranger
+evacuation
+sergio
+persuade
+dover
+tolerance
+penguin
+cum
+jockey
+insufficient
+townships
+occupying
+declining
+plural
+processed
+projection
+puppet
+flanders
+introduces
+liability
+##yon
+gymnastics
+antwerp
+taipei
+hobart
+candles
+jeep
+wes
+observers
+126
+chaplain
+bundle
+glorious
+##hine
+hazel
+flung
+sol
+excavations
+dumped
+stares
+sh
+bangalore
+triangular
+icelandic
+intervals
+expressing
+turbine
+##vers
+songwriting
+crafts
+##igo
+jasmine
+ditch
+rite
+##ways
+entertaining
+comply
+sorrow
+wrestlers
+basel
+emirates
+marian
+rivera
+helpful
+##some
+caution
+downward
+networking
+##atory
+##tered
+darted
+genocide
+emergence
+replies
+specializing
+spokesman
+convenient
+unlocked
+fading
+augustine
+concentrations
+resemblance
+elijah
+investigator
+andhra
+##uda
+promotes
+bean
+##rrell
+fleeing
+wan
+simone
+announcer
+##ame
+##bby
+lydia
+weaver
+132
+residency
+modification
+##fest
+stretches
+##ast
+alternatively
+nat
+lowe
+lacks
+##ented
+pam
+tile
+concealed
+inferior
+abdullah
+residences
+tissues
+vengeance
+##ided
+moisture
+peculiar
+groove
+zip
+bologna
+jennings
+ninja
+oversaw
+zombies
+pumping
+batch
+livingston
+emerald
+installations
+1797
+peel
+nitrogen
+rama
+##fying
+##star
+schooling
+strands
+responding
+werner
+##ost
+lime
+casa
+accurately
+targeting
+##rod
+underway
+##uru
+hemisphere
+lester
+##yard
+occupies
+2d
+griffith
+angrily
+reorganized
+##owing
+courtney
+deposited
+##dd
+##30
+estadio
+##ifies
+dunn
+exiled
+##ying
+checks
+##combe
+##о
+##fly
+successes
+unexpectedly
+blu
+assessed
+##flower
+##ه
+observing
+sacked
+spiders
+kn
+##tail
+mu
+nodes
+prosperity
+audrey
+divisional
+155
+broncos
+tangled
+adjust
+feeds
+erosion
+paolo
+surf
+directory
+snatched
+humid
+admiralty
+screwed
+gt
+reddish
+##nese
+modules
+trench
+lamps
+bind
+leah
+bucks
+competes
+##nz
+##form
+transcription
+##uc
+isles
+violently
+clutching
+pga
+cyclist
+inflation
+flats
+ragged
+unnecessary
+##hian
+stubborn
+coordinated
+harriet
+baba
+disqualified
+330
+insect
+wolfe
+##fies
+reinforcements
+rocked
+duel
+winked
+embraced
+bricks
+##raj
+hiatus
+defeats
+pending
+brightly
+jealousy
+##xton
+##hm
+##uki
+lena
+gdp
+colorful
+##dley
+stein
+kidney
+##shu
+underwear
+wanderers
+##haw
+##icus
+guardians
+m³
+roared
+habits
+##wise
+permits
+gp
+uranium
+punished
+disguise
+bundesliga
+elise
+dundee
+erotic
+partisan
+pi
+collectors
+float
+individually
+rendering
+behavioral
+bucharest
+ser
+hare
+valerie
+corporal
+nutrition
+proportional
+##isa
+immense
+##kis
+pavement
+##zie
+##eld
+sutherland
+crouched
+1775
+##lp
+suzuki
+trades
+endurance
+operas
+crosby
+prayed
+priory
+rory
+socially
+##urn
+gujarat
+##pu
+walton
+cube
+pasha
+privilege
+lennon
+floods
+thorne
+waterfall
+nipple
+scouting
+approve
+##lov
+minorities
+voter
+dwight
+extensions
+assure
+ballroom
+slap
+dripping
+privileges
+rejoined
+confessed
+demonstrating
+patriotic
+yell
+investor
+##uth
+pagan
+slumped
+squares
+##cle
+##kins
+confront
+bert
+embarrassment
+##aid
+aston
+urging
+sweater
+starr
+yuri
+brains
+williamson
+commuter
+mortar
+structured
+selfish
+exports
+##jon
+cds
+##him
+unfinished
+##rre
+mortgage
+destinations
+##nagar
+canoe
+solitary
+buchanan
+delays
+magistrate
+fk
+##pling
+motivation
+##lier
+##vier
+recruiting
+assess
+##mouth
+malik
+antique
+1791
+pius
+rahman
+reich
+tub
+zhou
+smashed
+airs
+galway
+xii
+conditioning
+honduras
+discharged
+dexter
+##pf
+lionel
+129
+debates
+lemon
+tiffany
+volunteered
+dom
+dioxide
+procession
+devi
+sic
+tremendous
+advertisements
+colts
+transferring
+verdict
+hanover
+decommissioned
+utter
+relate
+pac
+racism
+##top
+beacon
+limp
+similarity
+terra
+occurrence
+ant
+##how
+becky
+capt
+updates
+armament
+richie
+pal
+##graph
+halloween
+mayo
+##ssen
+##bone
+cara
+serena
+fcc
+dolls
+obligations
+##dling
+violated
+lafayette
+jakarta
+exploitation
+##ime
+infamous
+iconic
+##lah
+##park
+kitty
+moody
+reginald
+dread
+spill
+crystals
+olivier
+modeled
+bluff
+equilibrium
+separating
+notices
+ordnance
+extinction
+onset
+cosmic
+attachment
+sammy
+expose
+privy
+anchored
+##bil
+abbott
+admits
+bending
+baritone
+emmanuel
+policeman
+vaughan
+winged
+climax
+dresses
+denny
+polytechnic
+mohamed
+burmese
+authentic
+nikki
+genetics
+grandparents
+homestead
+gaza
+postponed
+metacritic
+una
+##sby
+##bat
+unstable
+dissertation
+##rial
+##cian
+curls
+obscure
+uncovered
+bronx
+praying
+disappearing
+##hoe
+prehistoric
+coke
+turret
+mutations
+nonprofit
+pits
+monaco
+##ي
+##usion
+prominently
+dispatched
+podium
+##mir
+uci
+##uation
+133
+fortifications
+birthplace
+kendall
+##lby
+##oll
+preacher
+rack
+goodman
+##rman
+persistent
+##ott
+countless
+jaime
+recorder
+lexington
+persecution
+jumps
+renewal
+wagons
+##11
+crushing
+##holder
+decorations
+##lake
+abundance
+wrath
+laundry
+£1
+garde
+##rp
+jeanne
+beetles
+peasant
+##sl
+splitting
+caste
+sergei
+##rer
+##ema
+scripts
+##ively
+rub
+satellites
+##vor
+inscribed
+verlag
+scrapped
+gale
+packages
+chick
+potato
+slogan
+kathleen
+arabs
+##culture
+counterparts
+reminiscent
+choral
+##tead
+rand
+retains
+bushes
+dane
+accomplish
+courtesy
+closes
+##oth
+slaughter
+hague
+krakow
+lawson
+tailed
+elias
+ginger
+##ttes
+canopy
+betrayal
+rebuilding
+turf
+##hof
+frowning
+allegiance
+brigades
+kicks
+rebuild
+polls
+alias
+nationalism
+td
+rowan
+audition
+bowie
+fortunately
+recognizes
+harp
+dillon
+horrified
+##oro
+renault
+##tics
+ropes
+##α
+presumed
+rewarded
+infrared
+wiping
+accelerated
+illustration
+##rid
+presses
+practitioners
+badminton
+##iard
+detained
+##tera
+recognizing
+relates
+misery
+##sies
+##tly
+reproduction
+piercing
+potatoes
+thornton
+esther
+manners
+hbo
+##aan
+ours
+bullshit
+ernie
+perennial
+sensitivity
+illuminated
+rupert
+##jin
+##iss
+##ear
+rfc
+nassau
+##dock
+staggered
+socialism
+##haven
+appointments
+nonsense
+prestige
+sharma
+haul
+##tical
+solidarity
+gps
+##ook
+##rata
+igor
+pedestrian
+##uit
+baxter
+tenants
+wires
+medication
+unlimited
+guiding
+impacts
+diabetes
+##rama
+sasha
+pas
+clive
+extraction
+131
+continually
+constraints
+##bilities
+sonata
+hunted
+sixteenth
+chu
+planting
+quote
+mayer
+pretended
+abs
+spat
+##hua
+ceramic
+##cci
+curtains
+pigs
+pitching
+##dad
+latvian
+sore
+dayton
+##sted
+##qi
+patrols
+slice
+playground
+##nted
+shone
+stool
+apparatus
+inadequate
+mates
+treason
+##ija
+desires
+##liga
+##croft
+somalia
+laurent
+mir
+leonardo
+oracle
+grape
+obliged
+chevrolet
+thirteenth
+stunning
+enthusiastic
+##ede
+accounted
+concludes
+currents
+basil
+##kovic
+drought
+##rica
+mai
+##aire
+shove
+posting
+##shed
+pilgrimage
+humorous
+packing
+fry
+pencil
+wines
+smells
+144
+marilyn
+aching
+newest
+clung
+bon
+neighbours
+sanctioned
+##pie
+mug
+##stock
+drowning
+##mma
+hydraulic
+##vil
+hiring
+reminder
+lilly
+investigators
+##ncies
+sour
+##eous
+compulsory
+packet
+##rion
+##graphic
+##elle
+cannes
+##inate
+depressed
+##rit
+heroic
+importantly
+theresa
+##tled
+conway
+saturn
+marginal
+rae
+##xia
+corresponds
+royce
+pact
+jasper
+explosives
+packaging
+aluminium
+##ttered
+denotes
+rhythmic
+spans
+assignments
+hereditary
+outlined
+originating
+sundays
+lad
+reissued
+greeting
+beatrice
+##dic
+pillar
+marcos
+plots
+handbook
+alcoholic
+judiciary
+avant
+slides
+extract
+masculine
+blur
+##eum
+##force
+homage
+trembled
+owens
+hymn
+trey
+omega
+signaling
+socks
+accumulated
+reacted
+attic
+theo
+lining
+angie
+distraction
+primera
+talbot
+##key
+1200
+ti
+creativity
+billed
+##hey
+deacon
+eduardo
+identifies
+proposition
+dizzy
+gunner
+hogan
+##yam
+##pping
+##hol
+ja
+##chan
+jensen
+reconstructed
+##berger
+clearance
+darius
+##nier
+abe
+harlem
+plea
+dei
+circled
+emotionally
+notation
+fascist
+neville
+exceeded
+upwards
+viable
+ducks
+##fo
+workforce
+racer
+limiting
+shri
+##lson
+possesses
+1600
+kerr
+moths
+devastating
+laden
+disturbing
+locking
+##cture
+gal
+fearing
+accreditation
+flavor
+aide
+1870s
+mountainous
+##baum
+melt
+##ures
+motel
+texture
+servers
+soda
+##mb
+herd
+##nium
+erect
+puzzled
+hum
+peggy
+examinations
+gould
+testified
+geoff
+ren
+devised
+sacks
+##law
+denial
+posters
+grunted
+cesar
+tutor
+ec
+gerry
+offerings
+byrne
+falcons
+combinations
+ct
+incoming
+pardon
+rocking
+26th
+avengers
+flared
+mankind
+seller
+uttar
+loch
+nadia
+stroking
+exposing
+##hd
+fertile
+ancestral
+instituted
+##has
+noises
+prophecy
+taxation
+eminent
+vivid
+pol
+##bol
+dart
+indirect
+multimedia
+notebook
+upside
+displaying
+adrenaline
+referenced
+geometric
+##iving
+progression
+##ddy
+blunt
+announce
+##far
+implementing
+##lav
+aggression
+liaison
+cooler
+cares
+headache
+plantations
+gorge
+dots
+impulse
+thickness
+ashamed
+averaging
+kathy
+obligation
+precursor
+137
+fowler
+symmetry
+thee
+225
+hears
+##rai
+undergoing
+ads
+butcher
+bowler
+##lip
+cigarettes
+subscription
+goodness
+##ically
+browne
+##hos
+##tech
+kyoto
+donor
+##erty
+damaging
+friction
+drifting
+expeditions
+hardened
+prostitution
+152
+fauna
+blankets
+claw
+tossing
+snarled
+butterflies
+recruits
+investigative
+coated
+healed
+138
+communal
+hai
+xiii
+academics
+boone
+psychologist
+restless
+lahore
+stephens
+mba
+brendan
+foreigners
+printer
+##pc
+ached
+explode
+27th
+deed
+scratched
+dared
+##pole
+cardiac
+1780
+okinawa
+proto
+commando
+compelled
+oddly
+electrons
+##base
+replica
+thanksgiving
+##rist
+sheila
+deliberate
+stafford
+tidal
+representations
+hercules
+ou
+##path
+##iated
+kidnapping
+lenses
+##tling
+deficit
+samoa
+mouths
+consuming
+computational
+maze
+granting
+smirk
+razor
+fixture
+ideals
+inviting
+aiden
+nominal
+##vs
+issuing
+julio
+pitt
+ramsey
+docks
+##oss
+exhaust
+##owed
+bavarian
+draped
+anterior
+mating
+ethiopian
+explores
+noticing
+##nton
+discarded
+convenience
+hoffman
+endowment
+beasts
+cartridge
+mormon
+paternal
+probe
+sleeves
+interfere
+lump
+deadline
+##rail
+jenks
+bulldogs
+scrap
+alternating
+justified
+reproductive
+nam
+seize
+descending
+secretariat
+kirby
+coupe
+grouped
+smash
+panther
+sedan
+tapping
+##18
+lola
+cheer
+germanic
+unfortunate
+##eter
+unrelated
+##fan
+subordinate
+##sdale
+suzanne
+advertisement
+##ility
+horsepower
+##lda
+cautiously
+discourse
+luigi
+##mans
+##fields
+noun
+prevalent
+mao
+schneider
+everett
+surround
+governorate
+kira
+##avia
+westward
+##take
+misty
+rails
+sustainability
+134
+unused
+##rating
+packs
+toast
+unwilling
+regulate
+thy
+suffrage
+nile
+awe
+assam
+definitions
+travelers
+affordable
+##rb
+conferred
+sells
+undefeated
+beneficial
+torso
+basal
+repeating
+remixes
+##pass
+bahrain
+cables
+fang
+##itated
+excavated
+numbering
+statutory
+##rey
+deluxe
+##lian
+forested
+ramirez
+derbyshire
+zeus
+slamming
+transfers
+astronomer
+banana
+lottery
+berg
+histories
+bamboo
+##uchi
+resurrection
+posterior
+bowls
+vaguely
+##thi
+thou
+preserving
+tensed
+offence
+##inas
+meyrick
+callum
+ridden
+watt
+langdon
+tying
+lowland
+snorted
+daring
+truman
+##hale
+##girl
+aura
+overly
+filing
+weighing
+goa
+infections
+philanthropist
+saunders
+eponymous
+##owski
+latitude
+perspectives
+reviewing
+mets
+commandant
+radial
+##kha
+flashlight
+reliability
+koch
+vowels
+amazed
+ada
+elaine
+supper
+##rth
+##encies
+predator
+debated
+soviets
+cola
+##boards
+##nah
+compartment
+crooked
+arbitrary
+fourteenth
+##ctive
+havana
+majors
+steelers
+clips
+profitable
+ambush
+exited
+packers
+##tile
+nude
+cracks
+fungi
+##е
+limb
+trousers
+josie
+shelby
+tens
+frederic
+##ος
+definite
+smoothly
+constellation
+insult
+baton
+discs
+lingering
+##nco
+conclusions
+lent
+staging
+becker
+grandpa
+shaky
+##tron
+einstein
+obstacles
+sk
+adverse
+elle
+economically
+##moto
+mccartney
+thor
+dismissal
+motions
+readings
+nostrils
+treatise
+##pace
+squeezing
+evidently
+prolonged
+1783
+venezuelan
+je
+marguerite
+beirut
+takeover
+shareholders
+##vent
+denise
+digit
+airplay
+norse
+##bbling
+imaginary
+pills
+hubert
+blaze
+vacated
+eliminating
+##ello
+vine
+mansfield
+##tty
+retrospective
+barrow
+borne
+clutch
+bail
+forensic
+weaving
+##nett
+##witz
+desktop
+citadel
+promotions
+worrying
+dorset
+ieee
+subdivided
+##iating
+manned
+expeditionary
+pickup
+synod
+chuckle
+185
+barney
+##rz
+##ffin
+functionality
+karachi
+litigation
+meanings
+uc
+lick
+turbo
+anders
+##ffed
+execute
+curl
+oppose
+ankles
+typhoon
+##د
+##ache
+##asia
+linguistics
+compassion
+pressures
+grazing
+perfection
+##iting
+immunity
+monopoly
+muddy
+backgrounds
+136
+namibia
+francesca
+monitors
+attracting
+stunt
+tuition
+##ии
+vegetable
+##mates
+##quent
+mgm
+jen
+complexes
+forts
+##ond
+cellar
+bites
+seventeenth
+royals
+flemish
+failures
+mast
+charities
+##cular
+peruvian
+capitals
+macmillan
+ipswich
+outward
+frigate
+postgraduate
+folds
+employing
+##ouse
+concurrently
+fiery
+##tai
+contingent
+nightmares
+monumental
+nicaragua
+##kowski
+lizard
+mal
+fielding
+gig
+reject
+##pad
+harding
+##ipe
+coastline
+##cin
+##nos
+beethoven
+humphrey
+innovations
+##tam
+##nge
+norris
+doris
+solicitor
+huang
+obey
+141
+##lc
+niagara
+##tton
+shelves
+aug
+bourbon
+curry
+nightclub
+specifications
+hilton
+##ndo
+centennial
+dispersed
+worm
+neglected
+briggs
+sm
+font
+kuala
+uneasy
+plc
+##nstein
+##bound
+##aking
+##burgh
+awaiting
+pronunciation
+##bbed
+##quest
+eh
+optimal
+zhu
+raped
+greens
+presided
+brenda
+worries
+##life
+venetian
+marxist
+turnout
+##lius
+refined
+braced
+sins
+grasped
+sunderland
+nickel
+speculated
+lowell
+cyrillic
+communism
+fundraising
+resembling
+colonists
+mutant
+freddie
+usc
+##mos
+gratitude
+##run
+mural
+##lous
+chemist
+wi
+reminds
+28th
+steals
+tess
+pietro
+##ingen
+promoter
+ri
+microphone
+honoured
+rai
+sant
+##qui
+feather
+##nson
+burlington
+kurdish
+terrorists
+deborah
+sickness
+##wed
+##eet
+hazard
+irritated
+desperation
+veil
+clarity
+##rik
+jewels
+xv
+##gged
+##ows
+##cup
+berkshire
+unfair
+mysteries
+orchid
+winced
+exhaustion
+renovations
+stranded
+obe
+infinity
+##nies
+adapt
+redevelopment
+thanked
+registry
+olga
+domingo
+noir
+tudor
+ole
+##atus
+commenting
+behaviors
+##ais
+crisp
+pauline
+probable
+stirling
+wigan
+##bian
+paralympics
+panting
+surpassed
+##rew
+luca
+barred
+pony
+famed
+##sters
+cassandra
+waiter
+carolyn
+exported
+##orted
+andres
+destructive
+deeds
+jonah
+castles
+vacancy
+suv
+##glass
+1788
+orchard
+yep
+famine
+belarusian
+sprang
+##forth
+skinny
+##mis
+administrators
+rotterdam
+zambia
+zhao
+boiler
+discoveries
+##ride
+##physics
+lucius
+disappointing
+outreach
+spoon
+##frame
+qualifications
+unanimously
+enjoys
+regency
+##iidae
+stade
+realism
+veterinary
+rodgers
+dump
+alain
+chestnut
+castile
+censorship
+rumble
+gibbs
+##itor
+communion
+reggae
+inactivated
+logs
+loads
+##houses
+homosexual
+##iano
+ale
+informs
+##cas
+phrases
+plaster
+linebacker
+ambrose
+kaiser
+fascinated
+850
+limerick
+recruitment
+forge
+mastered
+##nding
+leinster
+rooted
+threaten
+##strom
+borneo
+##hes
+suggestions
+scholarships
+propeller
+documentaries
+patronage
+coats
+constructing
+invest
+neurons
+comet
+entirety
+shouts
+identities
+annoying
+unchanged
+wary
+##antly
+##ogy
+neat
+oversight
+##kos
+phillies
+replay
+constance
+##kka
+incarnation
+humble
+skies
+minus
+##acy
+smithsonian
+##chel
+guerrilla
+jar
+cadets
+##plate
+surplus
+audit
+##aru
+cracking
+joanna
+louisa
+pacing
+##lights
+intentionally
+##iri
+diner
+nwa
+imprint
+australians
+tong
+unprecedented
+bunker
+naive
+specialists
+ark
+nichols
+railing
+leaked
+pedal
+##uka
+shrub
+longing
+roofs
+v8
+captains
+neural
+tuned
+##ntal
+##jet
+emission
+medina
+frantic
+codex
+definitive
+sid
+abolition
+intensified
+stocks
+enrique
+sustain
+genoa
+oxide
+##written
+clues
+cha
+##gers
+tributaries
+fragment
+venom
+##rity
+##ente
+##sca
+muffled
+vain
+sire
+laos
+##ingly
+##hana
+hastily
+snapping
+surfaced
+sentiment
+motive
+##oft
+contests
+approximate
+mesa
+luckily
+dinosaur
+exchanges
+propelled
+accord
+bourne
+relieve
+tow
+masks
+offended
+##ues
+cynthia
+##mmer
+rains
+bartender
+zinc
+reviewers
+lois
+##sai
+legged
+arrogant
+rafe
+rosie
+comprise
+handicap
+blockade
+inlet
+lagoon
+copied
+drilling
+shelley
+petals
+##inian
+mandarin
+obsolete
+##inated
+onward
+arguably
+productivity
+cindy
+praising
+seldom
+busch
+discusses
+raleigh
+shortage
+ranged
+stanton
+encouragement
+firstly
+conceded
+overs
+temporal
+##uke
+cbe
+##bos
+woo
+certainty
+pumps
+##pton
+stalked
+##uli
+lizzie
+periodic
+thieves
+weaker
+##night
+gases
+shoving
+chooses
+wc
+##chemical
+prompting
+weights
+##kill
+robust
+flanked
+sticky
+hu
+tuberculosis
+##eb
+##eal
+christchurch
+resembled
+wallet
+reese
+inappropriate
+pictured
+distract
+fixing
+fiddle
+giggled
+burger
+heirs
+hairy
+mechanic
+torque
+apache
+obsessed
+chiefly
+cheng
+logging
+##tag
+extracted
+meaningful
+numb
+##vsky
+gloucestershire
+reminding
+##bay
+unite
+##lit
+breeds
+diminished
+clown
+glove
+1860s
+##ن
+##ug
+archibald
+focal
+freelance
+sliced
+depiction
+##yk
+organism
+switches
+sights
+stray
+crawling
+##ril
+lever
+leningrad
+interpretations
+loops
+anytime
+reel
+alicia
+delighted
+##ech
+inhaled
+xiv
+suitcase
+bernie
+vega
+licenses
+northampton
+exclusion
+induction
+monasteries
+racecourse
+homosexuality
+##right
+##sfield
+##rky
+dimitri
+michele
+alternatives
+ions
+commentators
+genuinely
+objected
+pork
+hospitality
+fencing
+stephan
+warships
+peripheral
+wit
+drunken
+wrinkled
+quentin
+spends
+departing
+chung
+numerical
+spokesperson
+##zone
+johannesburg
+caliber
+killers
+##udge
+assumes
+neatly
+demographic
+abigail
+bloc
+##vel
+mounting
+##lain
+bentley
+slightest
+xu
+recipients
+##jk
+merlin
+##writer
+seniors
+prisons
+blinking
+hindwings
+flickered
+kappa
+##hel
+80s
+strengthening
+appealing
+brewing
+gypsy
+mali
+lashes
+hulk
+unpleasant
+harassment
+bio
+treaties
+predict
+instrumentation
+pulp
+troupe
+boiling
+mantle
+##ffe
+ins
+##vn
+dividing
+handles
+verbs
+##onal
+coconut
+senegal
+340
+thorough
+gum
+momentarily
+##sto
+cocaine
+panicked
+destined
+##turing
+teatro
+denying
+weary
+captained
+mans
+##hawks
+##code
+wakefield
+bollywood
+thankfully
+##16
+cyril
+##wu
+amendments
+##bahn
+consultation
+stud
+reflections
+kindness
+1787
+internally
+##ovo
+tex
+mosaic
+distribute
+paddy
+seeming
+143
+##hic
+piers
+##15
+##mura
+##verse
+popularly
+winger
+kang
+sentinel
+mccoy
+##anza
+covenant
+##bag
+verge
+fireworks
+suppress
+thrilled
+dominate
+##jar
+swansea
+##60
+142
+reconciliation
+##ndi
+stiffened
+cue
+dorian
+##uf
+damascus
+amor
+ida
+foremost
+##aga
+porsche
+unseen
+dir
+##had
+##azi
+stony
+lexi
+melodies
+##nko
+angular
+integer
+podcast
+ants
+inherent
+jaws
+justify
+persona
+##olved
+josephine
+##nr
+##ressed
+customary
+flashes
+gala
+cyrus
+glaring
+backyard
+ariel
+physiology
+greenland
+html
+stir
+avon
+atletico
+finch
+methodology
+ked
+##lent
+mas
+catholicism
+townsend
+branding
+quincy
+fits
+containers
+1777
+ashore
+aragon
+##19
+forearm
+poisoning
+##sd
+adopting
+conquer
+grinding
+amnesty
+keller
+finances
+evaluate
+forged
+lankan
+instincts
+##uto
+guam
+bosnian
+photographed
+workplace
+desirable
+protector
+##dog
+allocation
+intently
+encourages
+willy
+##sten
+bodyguard
+electro
+brighter
+##ν
+bihar
+##chev
+lasts
+opener
+amphibious
+sal
+verde
+arte
+##cope
+captivity
+vocabulary
+yields
+##tted
+agreeing
+desmond
+pioneered
+##chus
+strap
+campaigned
+railroads
+##ович
+emblem
+##dre
+stormed
+501
+##ulous
+marijuana
+northumberland
+##gn
+##nath
+bowen
+landmarks
+beaumont
+##qua
+danube
+##bler
+attorneys
+th
+ge
+flyers
+critique
+villains
+cass
+mutation
+acc
+##0s
+colombo
+mckay
+motif
+sampling
+concluding
+syndicate
+##rell
+neon
+stables
+ds
+warnings
+clint
+mourning
+wilkinson
+##tated
+merrill
+leopard
+evenings
+exhaled
+emil
+sonia
+ezra
+discrete
+stove
+farrell
+fifteenth
+prescribed
+superhero
+##rier
+worms
+helm
+wren
+##duction
+##hc
+expo
+##rator
+hq
+unfamiliar
+antony
+prevents
+acceleration
+fiercely
+mari
+painfully
+calculations
+cheaper
+ign
+clifton
+irvine
+davenport
+mozambique
+##np
+pierced
+##evich
+wonders
+##wig
+##cate
+##iling
+crusade
+ware
+##uel
+enzymes
+reasonably
+mls
+##coe
+mater
+ambition
+bunny
+eliot
+kernel
+##fin
+asphalt
+headmaster
+torah
+aden
+lush
+pins
+waived
+##care
+##yas
+joao
+substrate
+enforce
+##grad
+##ules
+alvarez
+selections
+epidemic
+tempted
+##bit
+bremen
+translates
+ensured
+waterfront
+29th
+forrest
+manny
+malone
+kramer
+reigning
+cookies
+simpler
+absorption
+205
+engraved
+##ffy
+evaluated
+1778
+haze
+146
+comforting
+crossover
+##abe
+thorn
+##rift
+##imo
+##pop
+suppression
+fatigue
+cutter
+##tr
+201
+wurttemberg
+##orf
+enforced
+hovering
+proprietary
+gb
+samurai
+syllable
+ascent
+lacey
+tick
+lars
+tractor
+merchandise
+rep
+bouncing
+defendants
+##yre
+huntington
+##ground
+##oko
+standardized
+##hor
+##hima
+assassinated
+nu
+predecessors
+rainy
+liar
+assurance
+lyrical
+##uga
+secondly
+flattened
+ios
+parameter
+undercover
+##mity
+bordeaux
+punish
+ridges
+markers
+exodus
+inactive
+hesitate
+debbie
+nyc
+pledge
+savoy
+nagar
+offset
+organist
+##tium
+hesse
+marin
+converting
+##iver
+diagram
+propulsion
+pu
+validity
+reverted
+supportive
+##dc
+ministries
+clans
+responds
+proclamation
+##inae
+##ø
+##rea
+ein
+pleading
+patriot
+sf
+birch
+islanders
+strauss
+hates
+##dh
+brandenburg
+concession
+rd
+##ob
+1900s
+killings
+textbook
+antiquity
+cinematography
+wharf
+embarrassing
+setup
+creed
+farmland
+inequality
+centred
+signatures
+fallon
+370
+##ingham
+##uts
+ceylon
+gazing
+directive
+laurie
+##tern
+globally
+##uated
+##dent
+allah
+excavation
+threads
+##cross
+148
+frantically
+icc
+utilize
+determines
+respiratory
+thoughtful
+receptions
+##dicate
+merging
+chandra
+seine
+147
+builders
+builds
+diagnostic
+dev
+visibility
+goddamn
+analyses
+dhaka
+cho
+proves
+chancel
+concurrent
+curiously
+canadians
+pumped
+restoring
+1850s
+turtles
+jaguar
+sinister
+spinal
+traction
+declan
+vows
+1784
+glowed
+capitalism
+swirling
+install
+universidad
+##lder
+##oat
+soloist
+##genic
+##oor
+coincidence
+beginnings
+nissan
+dip
+resorts
+caucasus
+combustion
+infectious
+##eno
+pigeon
+serpent
+##itating
+conclude
+masked
+salad
+jew
+##gr
+surreal
+toni
+##wc
+harmonica
+151
+##gins
+##etic
+##coat
+fishermen
+intending
+bravery
+##wave
+klaus
+titan
+wembley
+taiwanese
+ransom
+40th
+incorrect
+hussein
+eyelids
+jp
+cooke
+dramas
+utilities
+##etta
+##print
+eisenhower
+principally
+granada
+lana
+##rak
+openings
+concord
+##bl
+bethany
+connie
+morality
+sega
+##mons
+##nard
+earnings
+##kara
+##cine
+wii
+communes
+##rel
+coma
+composing
+softened
+severed
+grapes
+##17
+nguyen
+analyzed
+warlord
+hubbard
+heavenly
+behave
+slovenian
+##hit
+##ony
+hailed
+filmmakers
+trance
+caldwell
+skye
+unrest
+coward
+likelihood
+##aging
+bern
+sci
+taliban
+honolulu
+propose
+##wang
+1700
+browser
+imagining
+cobra
+contributes
+dukes
+instinctively
+conan
+violinist
+##ores
+accessories
+gradual
+##amp
+quotes
+sioux
+##dating
+undertake
+intercepted
+sparkling
+compressed
+139
+fungus
+tombs
+haley
+imposing
+rests
+degradation
+lincolnshire
+retailers
+wetlands
+tulsa
+distributor
+dungeon
+nun
+greenhouse
+convey
+atlantis
+aft
+exits
+oman
+dresser
+lyons
+##sti
+joking
+eddy
+judgement
+omitted
+digits
+##cts
+##game
+juniors
+##rae
+cents
+stricken
+une
+##ngo
+wizards
+weir
+breton
+nan
+technician
+fibers
+liking
+royalty
+##cca
+154
+persia
+terribly
+magician
+##rable
+##unt
+vance
+cafeteria
+booker
+camille
+warmer
+##static
+consume
+cavern
+gaps
+compass
+contemporaries
+foyer
+soothing
+graveyard
+maj
+plunged
+blush
+##wear
+cascade
+demonstrates
+ordinance
+##nov
+boyle
+##lana
+rockefeller
+shaken
+banjo
+izzy
+##ense
+breathless
+vines
+##32
+##eman
+alterations
+chromosome
+dwellings
+feudal
+mole
+153
+catalonia
+relics
+tenant
+mandated
+##fm
+fridge
+hats
+honesty
+patented
+raul
+heap
+cruisers
+accusing
+enlightenment
+infants
+wherein
+chatham
+contractors
+zen
+affinity
+hc
+osborne
+piston
+156
+traps
+maturity
+##rana
+lagos
+##zal
+peering
+##nay
+attendant
+dealers
+protocols
+subset
+prospects
+biographical
+##cre
+artery
+##zers
+insignia
+nuns
+endured
+##eration
+recommend
+schwartz
+serbs
+berger
+cromwell
+crossroads
+##ctor
+enduring
+clasped
+grounded
+##bine
+marseille
+twitched
+abel
+choke
+https
+catalyst
+moldova
+italians
+##tist
+disastrous
+wee
+##oured
+##nti
+wwf
+nope
+##piration
+##asa
+expresses
+thumbs
+167
+##nza
+coca
+1781
+cheating
+##ption
+skipped
+sensory
+heidelberg
+spies
+satan
+dangers
+semifinal
+202
+bohemia
+whitish
+confusing
+shipbuilding
+relies
+surgeons
+landings
+ravi
+baku
+moor
+suffix
+alejandro
+##yana
+litre
+upheld
+##unk
+rajasthan
+##rek
+coaster
+insists
+posture
+scenarios
+etienne
+favoured
+appoint
+transgender
+elephants
+poked
+greenwood
+defences
+fulfilled
+militant
+somali
+1758
+chalk
+potent
+##ucci
+migrants
+wink
+assistants
+nos
+restriction
+activism
+niger
+##ario
+colon
+shaun
+##sat
+daphne
+##erated
+swam
+congregations
+reprise
+considerations
+magnet
+playable
+xvi
+##р
+overthrow
+tobias
+knob
+chavez
+coding
+##mers
+propped
+katrina
+orient
+newcomer
+##suke
+temperate
+##pool
+farmhouse
+interrogation
+##vd
+committing
+##vert
+forthcoming
+strawberry
+joaquin
+macau
+ponds
+shocking
+siberia
+##cellular
+chant
+contributors
+##nant
+##ologists
+sped
+absorb
+hail
+1782
+spared
+##hore
+barbados
+karate
+opus
+originates
+saul
+##xie
+evergreen
+leaped
+##rock
+correlation
+exaggerated
+weekday
+unification
+bump
+tracing
+brig
+afb
+pathways
+utilizing
+##ners
+mod
+mb
+disturbance
+kneeling
+##stad
+##guchi
+100th
+pune
+##thy
+decreasing
+168
+manipulation
+miriam
+academia
+ecosystem
+occupational
+rbi
+##lem
+rift
+##14
+rotary
+stacked
+incorporation
+awakening
+generators
+guerrero
+racist
+##omy
+cyber
+derivatives
+culminated
+allie
+annals
+panzer
+sainte
+wikipedia
+pops
+zu
+austro
+##vate
+algerian
+politely
+nicholson
+mornings
+educate
+tastes
+thrill
+dartmouth
+##gating
+db
+##jee
+regan
+differing
+concentrating
+choreography
+divinity
+##media
+pledged
+alexandre
+routing
+gregor
+madeline
+##idal
+apocalypse
+##hora
+gunfire
+culminating
+elves
+fined
+liang
+lam
+programmed
+tar
+guessing
+transparency
+gabrielle
+##gna
+cancellation
+flexibility
+##lining
+accession
+shea
+stronghold
+nets
+specializes
+##rgan
+abused
+hasan
+sgt
+ling
+exceeding
+##₄
+admiration
+supermarket
+##ark
+photographers
+specialised
+tilt
+resonance
+hmm
+perfume
+380
+sami
+threatens
+garland
+botany
+guarding
+boiled
+greet
+puppy
+russo
+supplier
+wilmington
+vibrant
+vijay
+##bius
+paralympic
+grumbled
+paige
+faa
+licking
+margins
+hurricanes
+##gong
+fest
+grenade
+ripping
+##uz
+counseling
+weigh
+##sian
+needles
+wiltshire
+edison
+costly
+##not
+fulton
+tramway
+redesigned
+staffordshire
+cache
+gasping
+watkins
+sleepy
+candidacy
+##group
+monkeys
+timeline
+throbbing
+##bid
+##sos
+berth
+uzbekistan
+vanderbilt
+bothering
+overturned
+ballots
+gem
+##iger
+sunglasses
+subscribers
+hooker
+compelling
+ang
+exceptionally
+saloon
+stab
+##rdi
+carla
+terrifying
+rom
+##vision
+coil
+##oids
+satisfying
+vendors
+31st
+mackay
+deities
+overlooked
+ambient
+bahamas
+felipe
+olympia
+whirled
+botanist
+advertised
+tugging
+##dden
+disciples
+morales
+unionist
+rites
+foley
+morse
+motives
+creepy
+##₀
+soo
+##sz
+bargain
+highness
+frightening
+turnpike
+tory
+reorganization
+##cer
+depict
+biographer
+##walk
+unopposed
+manifesto
+##gles
+institut
+emile
+accidental
+kapoor
+##dam
+kilkenny
+cortex
+lively
+##13
+romanesque
+jain
+shan
+cannons
+##ood
+##ske
+petrol
+echoing
+amalgamated
+disappears
+cautious
+proposes
+sanctions
+trenton
+##ر
+flotilla
+aus
+contempt
+tor
+canary
+cote
+theirs
+##hun
+conceptual
+deleted
+fascinating
+paso
+blazing
+elf
+honourable
+hutchinson
+##eiro
+##outh
+##zin
+surveyor
+tee
+amidst
+wooded
+reissue
+intro
+##ono
+cobb
+shelters
+newsletter
+hanson
+brace
+encoding
+confiscated
+dem
+caravan
+marino
+scroll
+melodic
+cows
+imam
+##adi
+##aneous
+northward
+searches
+biodiversity
+cora
+310
+roaring
+##bers
+connell
+theologian
+halo
+compose
+pathetic
+unmarried
+dynamo
+##oot
+az
+calculation
+toulouse
+deserves
+humour
+nr
+forgiveness
+tam
+undergone
+martyr
+pamela
+myths
+whore
+counselor
+hicks
+290
+heavens
+battleship
+electromagnetic
+##bbs
+stellar
+establishments
+presley
+hopped
+##chin
+temptation
+90s
+wills
+nas
+##yuan
+nhs
+##nya
+seminars
+##yev
+adaptations
+gong
+asher
+lex
+indicator
+sikh
+tobago
+cites
+goin
+##yte
+satirical
+##gies
+characterised
+correspond
+bubbles
+lure
+participates
+##vid
+eruption
+skate
+therapeutic
+1785
+canals
+wholesale
+defaulted
+sac
+460
+petit
+##zzled
+virgil
+leak
+ravens
+256
+portraying
+##yx
+ghetto
+creators
+dams
+portray
+vicente
+##rington
+fae
+namesake
+bounty
+##arium
+joachim
+##ota
+##iser
+aforementioned
+axle
+snout
+depended
+dismantled
+reuben
+480
+##ibly
+gallagher
+##lau
+##pd
+earnest
+##ieu
+##iary
+inflicted
+objections
+##llar
+asa
+gritted
+##athy
+jericho
+##sea
+##was
+flick
+underside
+ceramics
+undead
+substituted
+195
+eastward
+undoubtedly
+wheeled
+chimney
+##iche
+guinness
+cb
+##ager
+siding
+##bell
+traitor
+baptiste
+disguised
+inauguration
+149
+tipperary
+choreographer
+perched
+warmed
+stationary
+eco
+##ike
+##ntes
+bacterial
+##aurus
+flores
+phosphate
+##core
+attacker
+invaders
+alvin
+intersects
+a1
+indirectly
+immigrated
+businessmen
+cornelius
+valves
+narrated
+pill
+sober
+ul
+nationale
+monastic
+applicants
+scenery
+##jack
+161
+motifs
+constitutes
+cpu
+##osh
+jurisdictions
+sd
+tuning
+irritation
+woven
+##uddin
+fertility
+gao
+##erie
+antagonist
+impatient
+glacial
+hides
+boarded
+denominations
+interception
+##jas
+cookie
+nicola
+##tee
+algebraic
+marquess
+bahn
+parole
+buyers
+bait
+turbines
+paperwork
+bestowed
+natasha
+renee
+oceans
+purchases
+157
+vaccine
+215
+##tock
+fixtures
+playhouse
+integrate
+jai
+oswald
+intellectuals
+##cky
+booked
+nests
+mortimer
+##isi
+obsession
+sept
+##gler
+##sum
+440
+scrutiny
+simultaneous
+squinted
+##shin
+collects
+oven
+shankar
+penned
+remarkably
+##я
+slips
+luggage
+spectral
+1786
+collaborations
+louie
+consolidation
+##ailed
+##ivating
+420
+hoover
+blackpool
+harness
+ignition
+vest
+tails
+belmont
+mongol
+skinner
+##nae
+visually
+mage
+derry
+##tism
+##unce
+stevie
+transitional
+##rdy
+redskins
+drying
+prep
+prospective
+##21
+annoyance
+oversee
+##loaded
+fills
+##books
+##iki
+announces
+fda
+scowled
+respects
+prasad
+mystic
+tucson
+##vale
+revue
+springer
+bankrupt
+1772
+aristotle
+salvatore
+habsburg
+##geny
+dal
+natal
+nut
+pod
+chewing
+darts
+moroccan
+walkover
+rosario
+lenin
+punjabi
+##ße
+grossed
+scattering
+wired
+invasive
+hui
+polynomial
+corridors
+wakes
+gina
+portrays
+##cratic
+arid
+retreating
+erich
+irwin
+sniper
+##dha
+linen
+lindsey
+maneuver
+butch
+shutting
+socio
+bounce
+commemorative
+postseason
+jeremiah
+pines
+275
+mystical
+beads
+bp
+abbas
+furnace
+bidding
+consulted
+assaulted
+empirical
+rubble
+enclosure
+sob
+weakly
+cancel
+polly
+yielded
+##emann
+curly
+prediction
+battered
+70s
+vhs
+jacqueline
+render
+sails
+barked
+detailing
+grayson
+riga
+sloane
+raging
+##yah
+herbs
+bravo
+##athlon
+alloy
+giggle
+imminent
+suffers
+assumptions
+waltz
+##itate
+accomplishments
+##ited
+bathing
+remixed
+deception
+prefix
+##emia
+deepest
+##tier
+##eis
+balkan
+frogs
+##rong
+slab
+##pate
+philosophers
+peterborough
+grains
+imports
+dickinson
+rwanda
+##atics
+1774
+dirk
+lan
+tablets
+##rove
+clone
+##rice
+caretaker
+hostilities
+mclean
+##gre
+regimental
+treasures
+norms
+impose
+tsar
+tango
+diplomacy
+variously
+complain
+192
+recognise
+arrests
+1779
+celestial
+pulitzer
+##dus
+bing
+libretto
+##moor
+adele
+splash
+##rite
+expectation
+lds
+confronts
+##izer
+spontaneous
+harmful
+wedge
+entrepreneurs
+buyer
+##ope
+bilingual
+translate
+rugged
+conner
+circulated
+uae
+eaton
+##gra
+##zzle
+lingered
+lockheed
+vishnu
+reelection
+alonso
+##oom
+joints
+yankee
+headline
+cooperate
+heinz
+laureate
+invading
+##sford
+echoes
+scandinavian
+##dham
+hugging
+vitamin
+salute
+micah
+hind
+trader
+##sper
+radioactive
+##ndra
+militants
+poisoned
+ratified
+remark
+campeonato
+deprived
+wander
+prop
+##dong
+outlook
+##tani
+##rix
+##eye
+chiang
+darcy
+##oping
+mandolin
+spice
+statesman
+babylon
+182
+walled
+forgetting
+afro
+##cap
+158
+giorgio
+buffer
+##polis
+planetary
+##gis
+overlap
+terminals
+kinda
+centenary
+##bir
+arising
+manipulate
+elm
+ke
+1770
+ak
+##tad
+chrysler
+mapped
+moose
+pomeranian
+quad
+macarthur
+assemblies
+shoreline
+recalls
+stratford
+##rted
+noticeable
+##evic
+imp
+##rita
+##sque
+accustomed
+supplying
+tents
+disgusted
+vogue
+sipped
+filters
+khz
+reno
+selecting
+luftwaffe
+mcmahon
+tyne
+masterpiece
+carriages
+collided
+dunes
+exercised
+flare
+remembers
+muzzle
+##mobile
+heck
+##rson
+burgess
+lunged
+middleton
+boycott
+bilateral
+##sity
+hazardous
+lumpur
+multiplayer
+spotlight
+jackets
+goldman
+liege
+porcelain
+rag
+waterford
+benz
+attracts
+hopeful
+battling
+ottomans
+kensington
+baked
+hymns
+cheyenne
+lattice
+levine
+borrow
+polymer
+clashes
+michaels
+monitored
+commitments
+denounced
+##25
+##von
+cavity
+##oney
+hobby
+akin
+##holders
+futures
+intricate
+cornish
+patty
+##oned
+illegally
+dolphin
+##lag
+barlow
+yellowish
+maddie
+apologized
+luton
+plagued
+##puram
+nana
+##rds
+sway
+fanny
+łodz
+##rino
+psi
+suspicions
+hanged
+##eding
+initiate
+charlton
+##por
+nak
+competent
+235
+analytical
+annex
+wardrobe
+reservations
+##rma
+sect
+162
+fairfax
+hedge
+piled
+buckingham
+uneven
+bauer
+simplicity
+snyder
+interpret
+accountability
+donors
+moderately
+byrd
+continents
+##cite
+##max
+disciple
+hr
+jamaican
+ping
+nominees
+##uss
+mongolian
+diver
+attackers
+eagerly
+ideological
+pillows
+miracles
+apartheid
+revolver
+sulfur
+clinics
+moran
+163
+##enko
+ile
+katy
+rhetoric
+##icated
+chronology
+recycling
+##hrer
+elongated
+mughal
+pascal
+profiles
+vibration
+databases
+domination
+##fare
+##rant
+matthias
+digest
+rehearsal
+polling
+weiss
+initiation
+reeves
+clinging
+flourished
+impress
+ngo
+##hoff
+##ume
+buckley
+symposium
+rhythms
+weed
+emphasize
+transforming
+##taking
+##gence
+##yman
+accountant
+analyze
+flicker
+foil
+priesthood
+voluntarily
+decreases
+##80
+##hya
+slater
+sv
+charting
+mcgill
+##lde
+moreno
+##iu
+besieged
+zur
+robes
+##phic
+admitting
+api
+deported
+turmoil
+peyton
+earthquakes
+##ares
+nationalists
+beau
+clair
+brethren
+interrupt
+welch
+curated
+galerie
+requesting
+164
+##ested
+impending
+steward
+viper
+##vina
+complaining
+beautifully
+brandy
+foam
+nl
+1660
+##cake
+alessandro
+punches
+laced
+explanations
+##lim
+attribute
+clit
+reggie
+discomfort
+##cards
+smoothed
+whales
+##cene
+adler
+countered
+duffy
+disciplinary
+widening
+recipe
+reliance
+conducts
+goats
+gradient
+preaching
+##shaw
+matilda
+quasi
+striped
+meridian
+cannabis
+cordoba
+certificates
+##agh
+##tering
+graffiti
+hangs
+pilgrims
+repeats
+##ych
+revive
+urine
+etat
+##hawk
+fueled
+belts
+fuzzy
+susceptible
+##hang
+mauritius
+salle
+sincere
+beers
+hooks
+##cki
+arbitration
+entrusted
+advise
+sniffed
+seminar
+junk
+donnell
+processors
+principality
+strapped
+celia
+mendoza
+everton
+fortunes
+prejudice
+starving
+reassigned
+steamer
+##lund
+tuck
+evenly
+foreman
+##ffen
+dans
+375
+envisioned
+slit
+##xy
+baseman
+liberia
+rosemary
+##weed
+electrified
+periodically
+potassium
+stride
+contexts
+sperm
+slade
+mariners
+influx
+bianca
+subcommittee
+##rane
+spilling
+icao
+estuary
+##nock
+delivers
+iphone
+##ulata
+isa
+mira
+bohemian
+dessert
+##sbury
+welcoming
+proudly
+slowing
+##chs
+musee
+ascension
+russ
+##vian
+waits
+##psy
+africans
+exploit
+##morphic
+gov
+eccentric
+crab
+peck
+##ull
+entrances
+formidable
+marketplace
+groom
+bolted
+metabolism
+patton
+robbins
+courier
+payload
+endure
+##ifier
+andes
+refrigerator
+##pr
+ornate
+##uca
+ruthless
+illegitimate
+masonry
+strasbourg
+bikes
+adobe
+##³
+apples
+quintet
+willingly
+niche
+bakery
+corpses
+energetic
+##cliffe
+##sser
+##ards
+177
+centimeters
+centro
+fuscous
+cretaceous
+rancho
+##yde
+andrei
+telecom
+tottenham
+oasis
+ordination
+vulnerability
+presiding
+corey
+cp
+penguins
+sims
+##pis
+malawi
+piss
+##48
+correction
+##cked
+##ffle
+##ryn
+countdown
+detectives
+psychiatrist
+psychedelic
+dinosaurs
+blouse
+##get
+choi
+vowed
+##oz
+randomly
+##pol
+49ers
+scrub
+blanche
+bruins
+dusseldorf
+##using
+unwanted
+##ums
+212
+dominique
+elevations
+headlights
+om
+laguna
+##oga
+1750
+famously
+ignorance
+shrewsbury
+##aine
+ajax
+breuning
+che
+confederacy
+greco
+overhaul
+##screen
+paz
+skirts
+disagreement
+cruelty
+jagged
+phoebe
+shifter
+hovered
+viruses
+##wes
+mandy
+##lined
+##gc
+landlord
+squirrel
+dashed
+##ι
+ornamental
+gag
+wally
+grange
+literal
+spurs
+undisclosed
+proceeding
+yin
+##text
+billie
+orphan
+spanned
+humidity
+indy
+weighted
+presentations
+explosions
+lucian
+##tary
+vaughn
+hindus
+##anga
+##hell
+psycho
+171
+daytona
+protects
+efficiently
+rematch
+sly
+tandem
+##oya
+rebranded
+impaired
+hee
+metropolis
+peach
+godfrey
+diaspora
+ethnicity
+prosperous
+gleaming
+dar
+grossing
+playback
+##rden
+stripe
+pistols
+##tain
+births
+labelled
+##cating
+172
+rudy
+alba
+##onne
+aquarium
+hostility
+##gb
+##tase
+shudder
+sumatra
+hardest
+lakers
+consonant
+creeping
+demos
+homicide
+capsule
+zeke
+liberties
+expulsion
+pueblo
+##comb
+trait
+transporting
+##ddin
+##neck
+##yna
+depart
+gregg
+mold
+ledge
+hangar
+oldham
+playboy
+termination
+analysts
+gmbh
+romero
+##itic
+insist
+cradle
+filthy
+brightness
+slash
+shootout
+deposed
+bordering
+##truct
+isis
+microwave
+tumbled
+sheltered
+cathy
+werewolves
+messy
+andersen
+convex
+clapped
+clinched
+satire
+wasting
+edo
+vc
+rufus
+##jak
+mont
+##etti
+poznan
+##keeping
+restructuring
+transverse
+##rland
+azerbaijani
+slovene
+gestures
+roommate
+choking
+shear
+##quist
+vanguard
+oblivious
+##hiro
+disagreed
+baptism
+##lich
+coliseum
+##aceae
+salvage
+societe
+cory
+locke
+relocation
+relying
+versailles
+ahl
+swelling
+##elo
+cheerful
+##word
+##edes
+gin
+sarajevo
+obstacle
+diverted
+##nac
+messed
+thoroughbred
+fluttered
+utrecht
+chewed
+acquaintance
+assassins
+dispatch
+mirza
+##wart
+nike
+salzburg
+swell
+yen
+##gee
+idle
+ligue
+samson
+##nds
+##igh
+playful
+spawned
+##cise
+tease
+##case
+burgundy
+##bot
+stirring
+skeptical
+interceptions
+marathi
+##dies
+bedrooms
+aroused
+pinch
+##lik
+preferences
+tattoos
+buster
+digitally
+projecting
+rust
+##ital
+kitten
+priorities
+addison
+pseudo
+##guard
+dusk
+icons
+sermon
+##psis
+##iba
+bt
+##lift
+##xt
+ju
+truce
+rink
+##dah
+##wy
+defects
+psychiatry
+offences
+calculate
+glucose
+##iful
+##rized
+##unda
+francaise
+##hari
+richest
+warwickshire
+carly
+1763
+purity
+redemption
+lending
+##cious
+muse
+bruises
+cerebral
+aero
+carving
+##name
+preface
+terminology
+invade
+monty
+##int
+anarchist
+blurred
+##iled
+rossi
+treats
+guts
+shu
+foothills
+ballads
+undertaking
+premise
+cecilia
+affiliates
+blasted
+conditional
+wilder
+minors
+drone
+rudolph
+buffy
+swallowing
+horton
+attested
+##hop
+rutherford
+howell
+primetime
+livery
+penal
+##bis
+minimize
+hydro
+wrecked
+wrought
+palazzo
+##gling
+cans
+vernacular
+friedman
+nobleman
+shale
+walnut
+danielle
+##ection
+##tley
+sears
+##kumar
+chords
+lend
+flipping
+streamed
+por
+dracula
+gallons
+sacrifices
+gamble
+orphanage
+##iman
+mckenzie
+##gible
+boxers
+daly
+##balls
+##ان
+208
+##ific
+##rative
+##iq
+exploited
+slated
+##uity
+circling
+hillary
+pinched
+goldberg
+provost
+campaigning
+lim
+piles
+ironically
+jong
+mohan
+successors
+usaf
+##tem
+##ught
+autobiographical
+haute
+preserves
+##ending
+acquitted
+comparisons
+203
+hydroelectric
+gangs
+cypriot
+torpedoes
+rushes
+chrome
+derive
+bumps
+instability
+fiat
+pets
+##mbe
+silas
+dye
+reckless
+settler
+##itation
+info
+heats
+##writing
+176
+canonical
+maltese
+fins
+mushroom
+stacy
+aspen
+avid
+##kur
+##loading
+vickers
+gaston
+hillside
+statutes
+wilde
+gail
+kung
+sabine
+comfortably
+motorcycles
+##rgo
+169
+pneumonia
+fetch
+##sonic
+axel
+faintly
+parallels
+##oop
+mclaren
+spouse
+compton
+interdisciplinary
+miner
+##eni
+181
+clamped
+##chal
+##llah
+separates
+versa
+##mler
+scarborough
+labrador
+##lity
+##osing
+rutgers
+hurdles
+como
+166
+burt
+divers
+##100
+wichita
+cade
+coincided
+##erson
+bruised
+mla
+##pper
+vineyard
+##ili
+##brush
+notch
+mentioning
+jase
+hearted
+kits
+doe
+##acle
+pomerania
+##ady
+ronan
+seizure
+pavel
+problematic
+##zaki
+domenico
+##ulin
+catering
+penelope
+dependence
+parental
+emilio
+ministerial
+atkinson
+##bolic
+clarkson
+chargers
+colby
+grill
+peeked
+arises
+summon
+##aged
+fools
+##grapher
+faculties
+qaeda
+##vial
+garner
+refurbished
+##hwa
+geelong
+disasters
+nudged
+bs
+shareholder
+lori
+algae
+reinstated
+rot
+##ades
+##nous
+invites
+stainless
+183
+inclusive
+##itude
+diocesan
+til
+##icz
+denomination
+##xa
+benton
+floral
+registers
+##ider
+##erman
+##kell
+absurd
+brunei
+guangzhou
+hitter
+retaliation
+##uled
+##eve
+blanc
+nh
+consistency
+contamination
+##eres
+##rner
+dire
+palermo
+broadcasters
+diaries
+inspire
+vols
+brewer
+tightening
+ky
+mixtape
+hormone
+##tok
+stokes
+##color
+##dly
+##ssi
+pg
+##ometer
+##lington
+sanitation
+##tility
+intercontinental
+apps
+##adt
+¹⁄₂
+cylinders
+economies
+favourable
+unison
+croix
+gertrude
+odyssey
+vanity
+dangling
+##logists
+upgrades
+dice
+middleweight
+practitioner
+##ight
+206
+henrik
+parlor
+orion
+angered
+lac
+python
+blurted
+##rri
+sensual
+intends
+swings
+angled
+##phs
+husky
+attain
+peerage
+precinct
+textiles
+cheltenham
+shuffled
+dai
+confess
+tasting
+bhutan
+##riation
+tyrone
+segregation
+abrupt
+ruiz
+##rish
+smirked
+blackwell
+confidential
+browning
+amounted
+##put
+vase
+scarce
+fabulous
+raided
+staple
+guyana
+unemployed
+glider
+shay
+##tow
+carmine
+troll
+intervene
+squash
+superstar
+##uce
+cylindrical
+len
+roadway
+researched
+handy
+##rium
+##jana
+meta
+lao
+declares
+##rring
+##tadt
+##elin
+##kova
+willem
+shrubs
+napoleonic
+realms
+skater
+qi
+volkswagen
+##ł
+tad
+hara
+archaeologist
+awkwardly
+eerie
+##kind
+wiley
+##heimer
+##24
+titus
+organizers
+cfl
+crusaders
+lama
+usb
+vent
+enraged
+thankful
+occupants
+maximilian
+##gaard
+possessing
+textbooks
+##oran
+collaborator
+quaker
+##ulo
+avalanche
+mono
+silky
+straits
+isaiah
+mustang
+surged
+resolutions
+potomac
+descend
+cl
+kilograms
+plato
+strains
+saturdays
+##olin
+bernstein
+##ype
+holstein
+ponytail
+##watch
+belize
+conversely
+heroine
+perpetual
+##ylus
+charcoal
+piedmont
+glee
+negotiating
+backdrop
+prologue
+##jah
+##mmy
+pasadena
+climbs
+ramos
+sunni
+##holm
+##tner
+##tri
+anand
+deficiency
+hertfordshire
+stout
+##avi
+aperture
+orioles
+##irs
+doncaster
+intrigued
+bombed
+coating
+otis
+##mat
+cocktail
+##jit
+##eto
+amir
+arousal
+sar
+##proof
+##act
+##ories
+dixie
+pots
+##bow
+whereabouts
+159
+##fted
+drains
+bullying
+cottages
+scripture
+coherent
+fore
+poe
+appetite
+##uration
+sampled
+##ators
+##dp
+derrick
+rotor
+jays
+peacock
+installment
+##rro
+advisors
+##coming
+rodeo
+scotch
+##mot
+##db
+##fen
+##vant
+ensued
+rodrigo
+dictatorship
+martyrs
+twenties
+##н
+towed
+incidence
+marta
+rainforest
+sai
+scaled
+##cles
+oceanic
+qualifiers
+symphonic
+mcbride
+dislike
+generalized
+aubrey
+colonization
+##iation
+##lion
+##ssing
+disliked
+lublin
+salesman
+##ulates
+spherical
+whatsoever
+sweating
+avalon
+contention
+punt
+severity
+alderman
+atari
+##dina
+##grant
+##rop
+scarf
+seville
+vertices
+annexation
+fairfield
+fascination
+inspiring
+launches
+palatinate
+regretted
+##rca
+feral
+##iom
+elk
+nap
+olsen
+reddy
+yong
+##leader
+##iae
+garment
+transports
+feng
+gracie
+outrage
+viceroy
+insides
+##esis
+breakup
+grady
+organizer
+softer
+grimaced
+222
+murals
+galicia
+arranging
+vectors
+##rsten
+bas
+##sb
+##cens
+sloan
+##eka
+bitten
+ara
+fender
+nausea
+bumped
+kris
+banquet
+comrades
+detector
+persisted
+##llan
+adjustment
+endowed
+cinemas
+##shot
+sellers
+##uman
+peek
+epa
+kindly
+neglect
+simpsons
+talon
+mausoleum
+runaway
+hangul
+lookout
+##cic
+rewards
+coughed
+acquainted
+chloride
+##ald
+quicker
+accordion
+neolithic
+##qa
+artemis
+coefficient
+lenny
+pandora
+tx
+##xed
+ecstasy
+litter
+segunda
+chairperson
+gemma
+hiss
+rumor
+vow
+nasal
+antioch
+compensate
+patiently
+transformers
+##eded
+judo
+morrow
+penis
+posthumous
+philips
+bandits
+husbands
+denote
+flaming
+##any
+##phones
+langley
+yorker
+1760
+walters
+##uo
+##kle
+gubernatorial
+fatty
+samsung
+leroy
+outlaw
+##nine
+unpublished
+poole
+jakob
+##ᵢ
+##ₙ
+crete
+distorted
+superiority
+##dhi
+intercept
+crust
+mig
+claus
+crashes
+positioning
+188
+stallion
+301
+frontal
+armistice
+##estinal
+elton
+aj
+encompassing
+camel
+commemorated
+malaria
+woodward
+calf
+cigar
+penetrate
+##oso
+willard
+##rno
+##uche
+illustrate
+amusing
+convergence
+noteworthy
+##lma
+##rva
+journeys
+realise
+manfred
+##sable
+410
+##vocation
+hearings
+fiance
+##posed
+educators
+provoked
+adjusting
+##cturing
+modular
+stockton
+paterson
+vlad
+rejects
+electors
+selena
+maureen
+##tres
+uber
+##rce
+swirled
+##num
+proportions
+nanny
+pawn
+naturalist
+parma
+apostles
+awoke
+ethel
+wen
+##bey
+monsoon
+overview
+##inating
+mccain
+rendition
+risky
+adorned
+##ih
+equestrian
+germain
+nj
+conspicuous
+confirming
+##yoshi
+shivering
+##imeter
+milestone
+rumours
+flinched
+bounds
+smacked
+token
+##bei
+lectured
+automobiles
+##shore
+impacted
+##iable
+nouns
+nero
+##leaf
+ismail
+prostitute
+trams
+##lace
+bridget
+sud
+stimulus
+impressions
+reins
+revolves
+##oud
+##gned
+giro
+honeymoon
+##swell
+criterion
+##sms
+##uil
+libyan
+prefers
+##osition
+211
+preview
+sucks
+accusation
+bursts
+metaphor
+diffusion
+tolerate
+faye
+betting
+cinematographer
+liturgical
+specials
+bitterly
+humboldt
+##ckle
+flux
+rattled
+##itzer
+archaeologists
+odor
+authorised
+marshes
+discretion
+##ов
+alarmed
+archaic
+inverse
+##leton
+explorers
+##pine
+drummond
+tsunami
+woodlands
+##minate
+##tland
+booklet
+insanity
+owning
+insert
+crafted
+calculus
+##tore
+receivers
+##bt
+stung
+##eca
+##nched
+prevailing
+travellers
+eyeing
+lila
+graphs
+##borne
+178
+julien
+##won
+morale
+adaptive
+therapist
+erica
+cw
+libertarian
+bowman
+pitches
+vita
+##ional
+crook
+##ads
+##entation
+caledonia
+mutiny
+##sible
+1840s
+automation
+##ß
+flock
+##pia
+ironic
+pathology
+##imus
+remarried
+##22
+joker
+withstand
+energies
+##att
+shropshire
+hostages
+madeleine
+tentatively
+conflicting
+mateo
+recipes
+euros
+ol
+mercenaries
+nico
+##ndon
+albuquerque
+augmented
+mythical
+bel
+freud
+##child
+cough
+##lica
+365
+freddy
+lillian
+genetically
+nuremberg
+calder
+209
+bonn
+outdoors
+paste
+suns
+urgency
+vin
+restraint
+tyson
+##cera
+##selle
+barrage
+bethlehem
+kahn
+##par
+mounts
+nippon
+barony
+happier
+ryu
+makeshift
+sheldon
+blushed
+castillo
+barking
+listener
+taped
+bethel
+fluent
+headlines
+pornography
+rum
+disclosure
+sighing
+mace
+doubling
+gunther
+manly
+##plex
+rt
+interventions
+physiological
+forwards
+emerges
+##tooth
+##gny
+compliment
+rib
+recession
+visibly
+barge
+faults
+connector
+exquisite
+prefect
+##rlin
+patio
+##cured
+elevators
+brandt
+italics
+pena
+173
+wasp
+satin
+ea
+botswana
+graceful
+respectable
+##jima
+##rter
+##oic
+franciscan
+generates
+##dl
+alfredo
+disgusting
+##olate
+##iously
+sherwood
+warns
+cod
+promo
+cheryl
+sino
+##ة
+##escu
+twitch
+##zhi
+brownish
+thom
+ortiz
+##dron
+densely
+##beat
+carmel
+reinforce
+##bana
+187
+anastasia
+downhill
+vertex
+contaminated
+remembrance
+harmonic
+homework
+##sol
+fiancee
+gears
+olds
+angelica
+loft
+ramsay
+quiz
+colliery
+sevens
+##cape
+autism
+##hil
+walkway
+##boats
+ruben
+abnormal
+ounce
+khmer
+##bbe
+zachary
+bedside
+morphology
+punching
+##olar
+sparrow
+convinces
+##35
+hewitt
+queer
+remastered
+rods
+mabel
+solemn
+notified
+lyricist
+symmetric
+##xide
+174
+encore
+passports
+wildcats
+##uni
+baja
+##pac
+mildly
+##ease
+bleed
+commodity
+mounds
+glossy
+orchestras
+##omo
+damian
+prelude
+ambitions
+##vet
+awhile
+remotely
+##aud
+asserts
+imply
+##iques
+distinctly
+modelling
+remedy
+##dded
+windshield
+dani
+xiao
+##endra
+audible
+powerplant
+1300
+invalid
+elemental
+acquisitions
+##hala
+immaculate
+libby
+plata
+smuggling
+ventilation
+denoted
+minh
+##morphism
+430
+differed
+dion
+kelley
+lore
+mocking
+sabbath
+spikes
+hygiene
+drown
+runoff
+stylized
+tally
+liberated
+aux
+interpreter
+righteous
+aba
+siren
+reaper
+pearce
+millie
+##cier
+##yra
+gaius
+##iso
+captures
+##ttering
+dorm
+claudio
+##sic
+benches
+knighted
+blackness
+##ored
+discount
+fumble
+oxidation
+routed
+##ς
+novak
+perpendicular
+spoiled
+fracture
+splits
+##urt
+pads
+topology
+##cats
+axes
+fortunate
+offenders
+protestants
+esteem
+221
+broadband
+convened
+frankly
+hound
+prototypes
+isil
+facilitated
+keel
+##sher
+sahara
+awaited
+bubba
+orb
+prosecutors
+186
+hem
+520
+##xing
+relaxing
+remnant
+romney
+sorted
+slalom
+stefano
+ulrich
+##active
+exemption
+folder
+pauses
+foliage
+hitchcock
+epithet
+204
+criticisms
+##aca
+ballistic
+brody
+hinduism
+chaotic
+youths
+equals
+##pala
+pts
+thicker
+analogous
+capitalist
+improvised
+overseeing
+sinatra
+ascended
+beverage
+##tl
+straightforward
+##kon
+curran
+##west
+bois
+325
+induce
+surveying
+emperors
+sax
+unpopular
+##kk
+cartoonist
+fused
+##mble
+unto
+##yuki
+localities
+##cko
+##ln
+darlington
+slain
+academie
+lobbying
+sediment
+puzzles
+##grass
+defiance
+dickens
+manifest
+tongues
+alumnus
+arbor
+coincide
+184
+appalachian
+mustafa
+examiner
+cabaret
+traumatic
+yves
+bracelet
+draining
+heroin
+magnum
+baths
+odessa
+consonants
+mitsubishi
+##gua
+kellan
+vaudeville
+##fr
+joked
+null
+straps
+probation
+##ław
+ceded
+interfaces
+##pas
+##zawa
+blinding
+viet
+224
+rothschild
+museo
+640
+huddersfield
+##vr
+tactic
+##storm
+brackets
+dazed
+incorrectly
+##vu
+reg
+glazed
+fearful
+manifold
+benefited
+irony
+##sun
+stumbling
+##rte
+willingness
+balkans
+mei
+wraps
+##aba
+injected
+##lea
+gu
+syed
+harmless
+##hammer
+bray
+takeoff
+poppy
+timor
+cardboard
+astronaut
+purdue
+weeping
+southbound
+cursing
+stalls
+diagonal
+##neer
+lamar
+bryce
+comte
+weekdays
+harrington
+##uba
+negatively
+##see
+lays
+grouping
+##cken
+##henko
+affirmed
+halle
+modernist
+##lai
+hodges
+smelling
+aristocratic
+baptized
+dismiss
+justification
+oilers
+##now
+coupling
+qin
+snack
+healer
+##qing
+gardener
+layla
+battled
+formulated
+stephenson
+gravitational
+##gill
+##jun
+1768
+granny
+coordinating
+suites
+##cd
+##ioned
+monarchs
+##cote
+##hips
+sep
+blended
+apr
+barrister
+deposition
+fia
+mina
+policemen
+paranoid
+##pressed
+churchyard
+covert
+crumpled
+creep
+abandoning
+tr
+transmit
+conceal
+barr
+understands
+readiness
+spire
+##cology
+##enia
+##erry
+610
+startling
+unlock
+vida
+bowled
+slots
+##nat
+##islav
+spaced
+trusting
+admire
+rig
+##ink
+slack
+##70
+mv
+207
+casualty
+##wei
+classmates
+##odes
+##rar
+##rked
+amherst
+furnished
+evolve
+foundry
+menace
+mead
+##lein
+flu
+wesleyan
+##kled
+monterey
+webber
+##vos
+wil
+##mith
+##на
+bartholomew
+justices
+restrained
+##cke
+amenities
+191
+mediated
+sewage
+trenches
+ml
+mainz
+##thus
+1800s
+##cula
+##inski
+caine
+bonding
+213
+converts
+spheres
+superseded
+marianne
+crypt
+sweaty
+ensign
+historia
+##br
+spruce
+##post
+##ask
+forks
+thoughtfully
+yukon
+pamphlet
+ames
+##uter
+karma
+##yya
+bryn
+negotiation
+sighs
+incapable
+##mbre
+##ntial
+actresses
+taft
+##mill
+luce
+prevailed
+##amine
+1773
+motionless
+envoy
+testify
+investing
+sculpted
+instructors
+provence
+kali
+cullen
+horseback
+##while
+goodwin
+##jos
+gaa
+norte
+##ldon
+modify
+wavelength
+abd
+214
+skinned
+sprinter
+forecast
+scheduling
+marries
+squared
+tentative
+##chman
+boer
+##isch
+bolts
+swap
+fisherman
+assyrian
+impatiently
+guthrie
+martins
+murdoch
+194
+tanya
+nicely
+dolly
+lacy
+med
+##45
+syn
+decks
+fashionable
+millionaire
+##ust
+surfing
+##ml
+##ision
+heaved
+tammy
+consulate
+attendees
+routinely
+197
+fuse
+saxophonist
+backseat
+malaya
+##lord
+scowl
+tau
+##ishly
+193
+sighted
+steaming
+##rks
+303
+911
+##holes
+##hong
+ching
+##wife
+bless
+conserved
+jurassic
+stacey
+unix
+zion
+chunk
+rigorous
+blaine
+198
+peabody
+slayer
+dismay
+brewers
+nz
+##jer
+det
+##glia
+glover
+postwar
+int
+penetration
+sylvester
+imitation
+vertically
+airlift
+heiress
+knoxville
+viva
+##uin
+390
+macon
+##rim
+##fighter
+##gonal
+janice
+##orescence
+##wari
+marius
+belongings
+leicestershire
+196
+blanco
+inverted
+preseason
+sanity
+sobbing
+##due
+##elt
+##dled
+collingwood
+regeneration
+flickering
+shortest
+##mount
+##osi
+feminism
+##lat
+sherlock
+cabinets
+fumbled
+northbound
+precedent
+snaps
+##mme
+researching
+##akes
+guillaume
+insights
+manipulated
+vapor
+neighbour
+sap
+gangster
+frey
+f1
+stalking
+scarcely
+callie
+barnett
+tendencies
+audi
+doomed
+assessing
+slung
+panchayat
+ambiguous
+bartlett
+##etto
+distributing
+violating
+wolverhampton
+##hetic
+swami
+histoire
+##urus
+liable
+pounder
+groin
+hussain
+larsen
+popping
+surprises
+##atter
+vie
+curt
+##station
+mute
+relocate
+musicals
+authorization
+richter
+##sef
+immortality
+tna
+bombings
+##press
+deteriorated
+yiddish
+##acious
+robbed
+colchester
+cs
+pmid
+ao
+verified
+balancing
+apostle
+swayed
+recognizable
+oxfordshire
+retention
+nottinghamshire
+contender
+judd
+invitational
+shrimp
+uhf
+##icient
+cleaner
+longitudinal
+tanker
+##mur
+acronym
+broker
+koppen
+sundance
+suppliers
+##gil
+4000
+clipped
+fuels
+petite
+##anne
+landslide
+helene
+diversion
+populous
+landowners
+auspices
+melville
+quantitative
+##xes
+ferries
+nicky
+##llus
+doo
+haunting
+roche
+carver
+downed
+unavailable
+##pathy
+approximation
+hiroshima
+##hue
+garfield
+valle
+comparatively
+keyboardist
+traveler
+##eit
+congestion
+calculating
+subsidiaries
+##bate
+serb
+modernization
+fairies
+deepened
+ville
+averages
+##lore
+inflammatory
+tonga
+##itch
+co₂
+squads
+##hea
+gigantic
+serum
+enjoyment
+retailer
+verona
+35th
+cis
+##phobic
+magna
+technicians
+##vati
+arithmetic
+##sport
+levin
+##dation
+amtrak
+chow
+sienna
+##eyer
+backstage
+entrepreneurship
+##otic
+learnt
+tao
+##udy
+worcestershire
+formulation
+baggage
+hesitant
+bali
+sabotage
+##kari
+barren
+enhancing
+murmur
+pl
+freshly
+putnam
+syntax
+aces
+medicines
+resentment
+bandwidth
+##sier
+grins
+chili
+guido
+##sei
+framing
+implying
+gareth
+lissa
+genevieve
+pertaining
+admissions
+geo
+thorpe
+proliferation
+sato
+bela
+analyzing
+parting
+##gor
+awakened
+##isman
+huddled
+secrecy
+##kling
+hush
+gentry
+540
+dungeons
+##ego
+coasts
+##utz
+sacrificed
+##chule
+landowner
+mutually
+prevalence
+programmer
+adolescent
+disrupted
+seaside
+gee
+trusts
+vamp
+georgie
+##nesian
+##iol
+schedules
+sindh
+##market
+etched
+hm
+sparse
+bey
+beaux
+scratching
+gliding
+unidentified
+216
+collaborating
+gems
+jesuits
+oro
+accumulation
+shaping
+mbe
+anal
+##xin
+231
+enthusiasts
+newscast
+##egan
+janata
+dewey
+parkinson
+179
+ankara
+biennial
+towering
+dd
+inconsistent
+950
+##chet
+thriving
+terminate
+cabins
+furiously
+eats
+advocating
+donkey
+marley
+muster
+phyllis
+leiden
+##user
+grassland
+glittering
+iucn
+loneliness
+217
+memorandum
+armenians
+##ddle
+popularized
+rhodesia
+60s
+lame
+##illon
+sans
+bikini
+header
+orbits
+##xx
+##finger
+##ulator
+sharif
+spines
+biotechnology
+strolled
+naughty
+yates
+##wire
+fremantle
+milo
+##mour
+abducted
+removes
+##atin
+humming
+wonderland
+##chrome
+##ester
+hume
+pivotal
+##rates
+armand
+grams
+believers
+elector
+rte
+apron
+bis
+scraped
+##yria
+endorsement
+initials
+##llation
+eps
+dotted
+hints
+buzzing
+emigration
+nearer
+##tom
+indicators
+##ulu
+coarse
+neutron
+protectorate
+##uze
+directional
+exploits
+pains
+loire
+1830s
+proponents
+guggenheim
+rabbits
+ritchie
+305
+hectare
+inputs
+hutton
+##raz
+verify
+##ako
+boilers
+longitude
+##lev
+skeletal
+yer
+emilia
+citrus
+compromised
+##gau
+pokemon
+prescription
+paragraph
+eduard
+cadillac
+attire
+categorized
+kenyan
+weddings
+charley
+##bourg
+entertain
+monmouth
+##lles
+nutrients
+davey
+mesh
+incentive
+practised
+ecosystems
+kemp
+subdued
+overheard
+##rya
+bodily
+maxim
+##nius
+apprenticeship
+ursula
+##fight
+lodged
+rug
+silesian
+unconstitutional
+patel
+inspected
+coyote
+unbeaten
+##hak
+34th
+disruption
+convict
+parcel
+##cl
+##nham
+collier
+implicated
+mallory
+##iac
+##lab
+susannah
+winkler
+##rber
+shia
+phelps
+sediments
+graphical
+robotic
+##sner
+adulthood
+mart
+smoked
+##isto
+kathryn
+clarified
+##aran
+divides
+convictions
+oppression
+pausing
+burying
+##mt
+federico
+mathias
+eileen
+##tana
+kite
+hunched
+##acies
+189
+##atz
+disadvantage
+liza
+kinetic
+greedy
+paradox
+yokohama
+dowager
+trunks
+ventured
+##gement
+gupta
+vilnius
+olaf
+##thest
+crimean
+hopper
+##ej
+progressively
+arturo
+mouthed
+arrondissement
+##fusion
+rubin
+simulcast
+oceania
+##orum
+##stra
+##rred
+busiest
+intensely
+navigator
+cary
+##vine
+##hini
+##bies
+fife
+rowe
+rowland
+posing
+insurgents
+shafts
+lawsuits
+activate
+conor
+inward
+culturally
+garlic
+265
+##eering
+eclectic
+##hui
+##kee
+##nl
+furrowed
+vargas
+meteorological
+rendezvous
+##aus
+culinary
+commencement
+##dition
+quota
+##notes
+mommy
+salaries
+overlapping
+mule
+##iology
+##mology
+sums
+wentworth
+##isk
+##zione
+mainline
+subgroup
+##illy
+hack
+plaintiff
+verdi
+bulb
+differentiation
+engagements
+multinational
+supplemented
+bertrand
+caller
+regis
+##naire
+##sler
+##arts
+##imated
+blossom
+propagation
+kilometer
+viaduct
+vineyards
+##uate
+beckett
+optimization
+golfer
+songwriters
+seminal
+semitic
+thud
+volatile
+evolving
+ridley
+##wley
+trivial
+distributions
+scandinavia
+jiang
+##ject
+wrestled
+insistence
+##dio
+emphasizes
+napkin
+##ods
+adjunct
+rhyme
+##ricted
+##eti
+hopeless
+surrounds
+tremble
+32nd
+smoky
+##ntly
+oils
+medicinal
+padded
+steer
+wilkes
+219
+255
+concessions
+hue
+uniquely
+blinded
+landon
+yahoo
+##lane
+hendrix
+commemorating
+dex
+specify
+chicks
+##ggio
+intercity
+1400
+morley
+##torm
+highlighting
+##oting
+pang
+oblique
+stalled
+##liner
+flirting
+newborn
+1769
+bishopric
+shaved
+232
+currie
+##ush
+dharma
+spartan
+##ooped
+favorites
+smug
+novella
+sirens
+abusive
+creations
+espana
+##lage
+paradigm
+semiconductor
+sheen
+##rdo
+##yen
+##zak
+nrl
+renew
+##pose
+##tur
+adjutant
+marches
+norma
+##enity
+ineffective
+weimar
+grunt
+##gat
+lordship
+plotting
+expenditure
+infringement
+lbs
+refrain
+av
+mimi
+mistakenly
+postmaster
+1771
+##bara
+ras
+motorsports
+tito
+199
+subjective
+##zza
+bully
+stew
+##kaya
+prescott
+1a
+##raphic
+##zam
+bids
+styling
+paranormal
+reeve
+sneaking
+exploding
+katz
+akbar
+migrant
+syllables
+indefinitely
+##ogical
+destroys
+replaces
+applause
+##phine
+pest
+##fide
+218
+articulated
+bertie
+##thing
+##cars
+##ptic
+courtroom
+crowley
+aesthetics
+cummings
+tehsil
+hormones
+titanic
+dangerously
+##ibe
+stadion
+jaenelle
+auguste
+ciudad
+##chu
+mysore
+partisans
+##sio
+lucan
+philipp
+##aly
+debating
+henley
+interiors
+##rano
+##tious
+homecoming
+beyonce
+usher
+henrietta
+prepares
+weeds
+##oman
+ely
+plucked
+##pire
+##dable
+luxurious
+##aq
+artifact
+password
+pasture
+juno
+maddy
+minsk
+##dder
+##ologies
+##rone
+assessments
+martian
+royalist
+1765
+examines
+##mani
+##rge
+nino
+223
+parry
+scooped
+relativity
+##eli
+##uting
+##cao
+congregational
+noisy
+traverse
+##agawa
+strikeouts
+nickelodeon
+obituary
+transylvania
+binds
+depictions
+polk
+trolley
+##yed
+##lard
+breeders
+##under
+dryly
+hokkaido
+1762
+strengths
+stacks
+bonaparte
+connectivity
+neared
+prostitutes
+stamped
+anaheim
+gutierrez
+sinai
+##zzling
+bram
+fresno
+madhya
+##86
+proton
+##lena
+##llum
+##phon
+reelected
+wanda
+##anus
+##lb
+ample
+distinguishing
+##yler
+grasping
+sermons
+tomato
+bland
+stimulation
+avenues
+##eux
+spreads
+scarlett
+fern
+pentagon
+assert
+baird
+chesapeake
+ir
+calmed
+distortion
+fatalities
+##olis
+correctional
+pricing
+##astic
+##gina
+prom
+dammit
+ying
+collaborate
+##chia
+welterweight
+33rd
+pointer
+substitution
+bonded
+umpire
+communicating
+multitude
+paddle
+##obe
+federally
+intimacy
+##insky
+betray
+ssr
+##lett
+##lean
+##lves
+##therapy
+airbus
+##tery
+functioned
+ud
+bearer
+biomedical
+netflix
+##hire
+##nca
+condom
+brink
+ik
+##nical
+macy
+##bet
+flap
+gma
+experimented
+jelly
+lavender
+##icles
+##ulia
+munro
+##mian
+##tial
+rye
+##rle
+60th
+gigs
+hottest
+rotated
+predictions
+fuji
+bu
+##erence
+##omi
+barangay
+##fulness
+##sas
+clocks
+##rwood
+##liness
+cereal
+roe
+wight
+decker
+uttered
+babu
+onion
+xml
+forcibly
+##df
+petra
+sarcasm
+hartley
+peeled
+storytelling
+##42
+##xley
+##ysis
+##ffa
+fibre
+kiel
+auditor
+fig
+harald
+greenville
+##berries
+geographically
+nell
+quartz
+##athic
+cemeteries
+##lr
+crossings
+nah
+holloway
+reptiles
+chun
+sichuan
+snowy
+660
+corrections
+##ivo
+zheng
+ambassadors
+blacksmith
+fielded
+fluids
+hardcover
+turnover
+medications
+melvin
+academies
+##erton
+ro
+roach
+absorbing
+spaniards
+colton
+##founded
+outsider
+espionage
+kelsey
+245
+edible
+##ulf
+dora
+establishes
+##sham
+##tries
+contracting
+##tania
+cinematic
+costello
+nesting
+##uron
+connolly
+duff
+##nology
+mma
+##mata
+fergus
+sexes
+gi
+optics
+spectator
+woodstock
+banning
+##hee
+##fle
+differentiate
+outfielder
+refinery
+226
+312
+gerhard
+horde
+lair
+drastically
+##udi
+landfall
+##cheng
+motorsport
+odi
+##achi
+predominant
+quay
+skins
+##ental
+edna
+harshly
+complementary
+murdering
+##aves
+wreckage
+##90
+ono
+outstretched
+lennox
+munitions
+galen
+reconcile
+470
+scalp
+bicycles
+gillespie
+questionable
+rosenberg
+guillermo
+hostel
+jarvis
+kabul
+volvo
+opium
+yd
+##twined
+abuses
+decca
+outpost
+##cino
+sensible
+neutrality
+##64
+ponce
+anchorage
+atkins
+turrets
+inadvertently
+disagree
+libre
+vodka
+reassuring
+weighs
+##yal
+glide
+jumper
+ceilings
+repertory
+outs
+stain
+##bial
+envy
+##ucible
+smashing
+heightened
+policing
+hyun
+mixes
+lai
+prima
+##ples
+celeste
+##bina
+lucrative
+intervened
+kc
+manually
+##rned
+stature
+staffed
+bun
+bastards
+nairobi
+priced
+##auer
+thatcher
+##kia
+tripped
+comune
+##ogan
+##pled
+brasil
+incentives
+emanuel
+hereford
+musica
+##kim
+benedictine
+biennale
+##lani
+eureka
+gardiner
+rb
+knocks
+sha
+##ael
+##elled
+##onate
+efficacy
+ventura
+masonic
+sanford
+maize
+leverage
+##feit
+capacities
+santana
+##aur
+novelty
+vanilla
+##cter
+##tour
+benin
+##oir
+##rain
+neptune
+drafting
+tallinn
+##cable
+humiliation
+##boarding
+schleswig
+fabian
+bernardo
+liturgy
+spectacle
+sweeney
+pont
+routledge
+##tment
+cosmos
+ut
+hilt
+sleek
+universally
+##eville
+##gawa
+typed
+##dry
+favors
+allegheny
+glaciers
+##rly
+recalling
+aziz
+##log
+parasite
+requiem
+auf
+##berto
+##llin
+illumination
+##breaker
+##issa
+festivities
+bows
+govern
+vibe
+vp
+333
+sprawled
+larson
+pilgrim
+bwf
+leaping
+##rts
+##ssel
+alexei
+greyhound
+hoarse
+##dler
+##oration
+seneca
+##cule
+gaping
+##ulously
+##pura
+cinnamon
+##gens
+##rricular
+craven
+fantasies
+houghton
+engined
+reigned
+dictator
+supervising
+##oris
+bogota
+commentaries
+unnatural
+fingernails
+spirituality
+tighten
+##tm
+canadiens
+protesting
+intentional
+cheers
+sparta
+##ytic
+##iere
+##zine
+widen
+belgarath
+controllers
+dodd
+iaaf
+navarre
+##ication
+defect
+squire
+steiner
+whisky
+##mins
+560
+inevitably
+tome
+##gold
+chew
+##uid
+##lid
+elastic
+##aby
+streaked
+alliances
+jailed
+regal
+##ined
+##phy
+czechoslovak
+narration
+absently
+##uld
+bluegrass
+guangdong
+quran
+criticizing
+hose
+hari
+##liest
+##owa
+skier
+streaks
+deploy
+##lom
+raft
+bose
+dialed
+huff
+##eira
+haifa
+simplest
+bursting
+endings
+ib
+sultanate
+##titled
+franks
+whitman
+ensures
+sven
+##ggs
+collaborators
+forster
+organising
+ui
+banished
+napier
+injustice
+teller
+layered
+thump
+##otti
+roc
+battleships
+evidenced
+fugitive
+sadie
+robotics
+##roud
+equatorial
+geologist
+##iza
+yielding
+##bron
+##sr
+internationale
+mecca
+##diment
+sbs
+skyline
+toad
+uploaded
+reflective
+undrafted
+lal
+leafs
+bayern
+##dai
+lakshmi
+shortlisted
+##stick
+##wicz
+camouflage
+donate
+af
+christi
+lau
+##acio
+disclosed
+nemesis
+1761
+assemble
+straining
+northamptonshire
+tal
+##asi
+bernardino
+premature
+heidi
+42nd
+coefficients
+galactic
+reproduce
+buzzed
+sensations
+zionist
+monsieur
+myrtle
+##eme
+archery
+strangled
+musically
+viewpoint
+antiquities
+bei
+trailers
+seahawks
+cured
+pee
+preferring
+tasmanian
+lange
+sul
+##mail
+##working
+colder
+overland
+lucivar
+massey
+gatherings
+haitian
+##smith
+disapproval
+flaws
+##cco
+##enbach
+1766
+npr
+##icular
+boroughs
+creole
+forums
+techno
+1755
+dent
+abdominal
+streetcar
+##eson
+##stream
+procurement
+gemini
+predictable
+##tya
+acheron
+christoph
+feeder
+fronts
+vendor
+bernhard
+jammu
+tumors
+slang
+##uber
+goaltender
+twists
+curving
+manson
+vuelta
+mer
+peanut
+confessions
+pouch
+unpredictable
+allowance
+theodor
+vascular
+##factory
+bala
+authenticity
+metabolic
+coughing
+nanjing
+##cea
+pembroke
+##bard
+splendid
+36th
+ff
+hourly
+##ahu
+elmer
+handel
+##ivate
+awarding
+thrusting
+dl
+experimentation
+##hesion
+##46
+caressed
+entertained
+steak
+##rangle
+biologist
+orphans
+baroness
+oyster
+stepfather
+##dridge
+mirage
+reefs
+speeding
+##31
+barons
+1764
+227
+inhabit
+preached
+repealed
+##tral
+honoring
+boogie
+captives
+administer
+johanna
+##imate
+gel
+suspiciously
+1767
+sobs
+##dington
+backbone
+hayward
+garry
+##folding
+##nesia
+maxi
+##oof
+##ppe
+ellison
+galileo
+##stand
+crimea
+frenzy
+amour
+bumper
+matrices
+natalia
+baking
+garth
+palestinians
+##grove
+smack
+conveyed
+ensembles
+gardening
+##manship
+##rup
+##stituting
+1640
+harvesting
+topography
+jing
+shifters
+dormitory
+##carriage
+##lston
+ist
+skulls
+##stadt
+dolores
+jewellery
+sarawak
+##wai
+##zier
+fences
+christy
+confinement
+tumbling
+credibility
+fir
+stench
+##bria
+##plication
+##nged
+##sam
+virtues
+##belt
+marjorie
+pba
+##eem
+##made
+celebrates
+schooner
+agitated
+barley
+fulfilling
+anthropologist
+##pro
+restrict
+novi
+regulating
+##nent
+padres
+##rani
+##hesive
+loyola
+tabitha
+milky
+olson
+proprietor
+crambidae
+guarantees
+intercollegiate
+ljubljana
+hilda
+##sko
+ignorant
+hooded
+##lts
+sardinia
+##lidae
+##vation
+frontman
+privileged
+witchcraft
+##gp
+jammed
+laude
+poking
+##than
+bracket
+amazement
+yunnan
+##erus
+maharaja
+linnaeus
+264
+commissioning
+milano
+peacefully
+##logies
+akira
+rani
+regulator
+##36
+grasses
+##rance
+luzon
+crows
+compiler
+gretchen
+seaman
+edouard
+tab
+buccaneers
+ellington
+hamlets
+whig
+socialists
+##anto
+directorial
+easton
+mythological
+##kr
+##vary
+rhineland
+semantic
+taut
+dune
+inventions
+succeeds
+##iter
+replication
+branched
+##pired
+jul
+prosecuted
+kangaroo
+penetrated
+##avian
+middlesbrough
+doses
+bleak
+madam
+predatory
+relentless
+##vili
+reluctance
+##vir
+hailey
+crore
+silvery
+1759
+monstrous
+swimmers
+transmissions
+hawthorn
+informing
+##eral
+toilets
+caracas
+crouch
+kb
+##sett
+295
+cartel
+hadley
+##aling
+alexia
+yvonne
+##biology
+cinderella
+eton
+superb
+blizzard
+stabbing
+industrialist
+maximus
+##gm
+##orus
+groves
+maud
+clade
+oversized
+comedic
+##bella
+rosen
+nomadic
+fulham
+montane
+beverages
+galaxies
+redundant
+swarm
+##rot
+##folia
+##llis
+buckinghamshire
+fen
+bearings
+bahadur
+##rom
+gilles
+phased
+dynamite
+faber
+benoit
+vip
+##ount
+##wd
+booking
+fractured
+tailored
+anya
+spices
+westwood
+cairns
+auditions
+inflammation
+steamed
+##rocity
+##acion
+##urne
+skyla
+thereof
+watford
+torment
+archdeacon
+transforms
+lulu
+demeanor
+fucked
+serge
+##sor
+mckenna
+minas
+entertainer
+##icide
+caress
+originate
+residue
+##sty
+1740
+##ilised
+##org
+beech
+##wana
+subsidies
+##ghton
+emptied
+gladstone
+ru
+firefighters
+voodoo
+##rcle
+het
+nightingale
+tamara
+edmond
+ingredient
+weaknesses
+silhouette
+285
+compatibility
+withdrawing
+hampson
+##mona
+anguish
+giggling
+##mber
+bookstore
+##jiang
+southernmost
+tilting
+##vance
+bai
+economical
+rf
+briefcase
+dreadful
+hinted
+projections
+shattering
+totaling
+##rogate
+analogue
+indicted
+periodical
+fullback
+##dman
+haynes
+##tenberg
+##ffs
+##ishment
+1745
+thirst
+stumble
+penang
+vigorous
+##ddling
+##kor
+##lium
+octave
+##ove
+##enstein
+##inen
+##ones
+siberian
+##uti
+cbn
+repeal
+swaying
+##vington
+khalid
+tanaka
+unicorn
+otago
+plastered
+lobe
+riddle
+##rella
+perch
+##ishing
+croydon
+filtered
+graeme
+tripoli
+##ossa
+crocodile
+##chers
+sufi
+mined
+##tung
+inferno
+lsu
+##phi
+swelled
+utilizes
+£2
+cale
+periodicals
+styx
+hike
+informally
+coop
+lund
+##tidae
+ala
+hen
+qui
+transformations
+disposed
+sheath
+chickens
+##cade
+fitzroy
+sas
+silesia
+unacceptable
+odisha
+1650
+sabrina
+pe
+spokane
+ratios
+athena
+massage
+shen
+dilemma
+##drum
+##riz
+##hul
+corona
+doubtful
+niall
+##pha
+##bino
+fines
+cite
+acknowledging
+bangor
+ballard
+bathurst
+##resh
+huron
+mustered
+alzheimer
+garments
+kinase
+tyre
+warship
+##cp
+flashback
+pulmonary
+braun
+cheat
+kamal
+cyclists
+constructions
+grenades
+ndp
+traveller
+excuses
+stomped
+signalling
+trimmed
+futsal
+mosques
+relevance
+##wine
+wta
+##23
+##vah
+##lter
+hoc
+##riding
+optimistic
+##´s
+deco
+sim
+interacting
+rejecting
+moniker
+waterways
+##ieri
+##oku
+mayors
+gdansk
+outnumbered
+pearls
+##ended
+##hampton
+fairs
+totals
+dominating
+262
+notions
+stairway
+compiling
+pursed
+commodities
+grease
+yeast
+##jong
+carthage
+griffiths
+residual
+amc
+contraction
+laird
+sapphire
+##marine
+##ivated
+amalgamation
+dissolve
+inclination
+lyle
+packaged
+altitudes
+suez
+canons
+graded
+lurched
+narrowing
+boasts
+guise
+wed
+enrico
+##ovsky
+rower
+scarred
+bree
+cub
+iberian
+protagonists
+bargaining
+proposing
+trainers
+voyages
+vans
+fishes
+##aea
+##ivist
+##verance
+encryption
+artworks
+kazan
+sabre
+cleopatra
+hepburn
+rotting
+supremacy
+mecklenburg
+##brate
+burrows
+hazards
+outgoing
+flair
+organizes
+##ctions
+scorpion
+##usions
+boo
+234
+chevalier
+dunedin
+slapping
+##34
+ineligible
+pensions
+##38
+##omic
+manufactures
+emails
+bismarck
+238
+weakening
+blackish
+ding
+mcgee
+quo
+##rling
+northernmost
+xx
+manpower
+greed
+sampson
+clicking
+##ange
+##horpe
+##inations
+##roving
+torre
+##eptive
+##moral
+symbolism
+38th
+asshole
+meritorious
+outfits
+splashed
+biographies
+sprung
+astros
+##tale
+302
+737
+filly
+raoul
+nw
+tokugawa
+linden
+clubhouse
+##apa
+tracts
+romano
+##pio
+putin
+tags
+##note
+chained
+dickson
+gunshot
+moe
+gunn
+rashid
+##tails
+zipper
+##bas
+##nea
+contrasted
+##ply
+##udes
+plum
+pharaoh
+##pile
+aw
+comedies
+ingrid
+sandwiches
+subdivisions
+1100
+mariana
+nokia
+kamen
+hz
+delaney
+veto
+herring
+##words
+possessive
+outlines
+##roup
+siemens
+stairwell
+rc
+gallantry
+messiah
+palais
+yells
+233
+zeppelin
+##dm
+bolivar
+##cede
+smackdown
+mckinley
+##mora
+##yt
+muted
+geologic
+finely
+unitary
+avatar
+hamas
+maynard
+rees
+bog
+contrasting
+##rut
+liv
+chico
+disposition
+pixel
+##erate
+becca
+dmitry
+yeshiva
+narratives
+##lva
+##ulton
+mercenary
+sharpe
+tempered
+navigate
+stealth
+amassed
+keynes
+##lini
+untouched
+##rrie
+havoc
+lithium
+##fighting
+abyss
+graf
+southward
+wolverine
+balloons
+implements
+ngos
+transitions
+##icum
+ambushed
+concacaf
+dormant
+economists
+##dim
+costing
+csi
+rana
+universite
+boulders
+verity
+##llon
+collin
+mellon
+misses
+cypress
+fluorescent
+lifeless
+spence
+##ulla
+crewe
+shepard
+pak
+revelations
+##م
+jolly
+gibbons
+paw
+##dro
+##quel
+freeing
+##test
+shack
+fries
+palatine
+##51
+##hiko
+accompaniment
+cruising
+recycled
+##aver
+erwin
+sorting
+synthesizers
+dyke
+realities
+sg
+strides
+enslaved
+wetland
+##ghan
+competence
+gunpowder
+grassy
+maroon
+reactors
+objection
+##oms
+carlson
+gearbox
+macintosh
+radios
+shelton
+##sho
+clergyman
+prakash
+254
+mongols
+trophies
+oricon
+228
+stimuli
+twenty20
+cantonese
+cortes
+mirrored
+##saurus
+bhp
+cristina
+melancholy
+##lating
+enjoyable
+nuevo
+##wny
+downfall
+schumacher
+##ind
+banging
+lausanne
+rumbled
+paramilitary
+reflex
+ax
+amplitude
+migratory
+##gall
+##ups
+midi
+barnard
+lastly
+sherry
+##hp
+##nall
+keystone
+##kra
+carleton
+slippery
+##53
+coloring
+foe
+socket
+otter
+##rgos
+mats
+##tose
+consultants
+bafta
+bison
+topping
+##km
+490
+primal
+abandonment
+transplant
+atoll
+hideous
+mort
+pained
+reproduced
+tae
+howling
+##turn
+unlawful
+billionaire
+hotter
+poised
+lansing
+##chang
+dinamo
+retro
+messing
+nfc
+domesday
+##mina
+blitz
+timed
+##athing
+##kley
+ascending
+gesturing
+##izations
+signaled
+tis
+chinatown
+mermaid
+savanna
+jameson
+##aint
+catalina
+##pet
+##hers
+cochrane
+cy
+chatting
+##kus
+alerted
+computation
+mused
+noelle
+majestic
+mohawk
+campo
+octagonal
+##sant
+##hend
+241
+aspiring
+##mart
+comprehend
+iona
+paralyzed
+shimmering
+swindon
+rhone
+##eley
+reputed
+configurations
+pitchfork
+agitation
+francais
+gillian
+lipstick
+##ilo
+outsiders
+pontifical
+resisting
+bitterness
+sewer
+rockies
+##edd
+##ucher
+misleading
+1756
+exiting
+galloway
+##nging
+risked
+##heart
+246
+commemoration
+schultz
+##rka
+integrating
+##rsa
+poses
+shrieked
+##weiler
+guineas
+gladys
+jerking
+owls
+goldsmith
+nightly
+penetrating
+##unced
+lia
+##33
+ignited
+betsy
+##aring
+##thorpe
+follower
+vigorously
+##rave
+coded
+kiran
+knit
+zoology
+tbilisi
+##28
+##bered
+repository
+govt
+deciduous
+dino
+growling
+##bba
+enhancement
+unleashed
+chanting
+pussy
+biochemistry
+##eric
+kettle
+repression
+toxicity
+nrhp
+##arth
+##kko
+##bush
+ernesto
+commended
+outspoken
+242
+mca
+parchment
+sms
+kristen
+##aton
+bisexual
+raked
+glamour
+navajo
+a2
+conditioned
+showcased
+##hma
+spacious
+youthful
+##esa
+usl
+appliances
+junta
+brest
+layne
+conglomerate
+enchanted
+chao
+loosened
+picasso
+circulating
+inspect
+montevideo
+##centric
+##kti
+piazza
+spurred
+##aith
+bari
+freedoms
+poultry
+stamford
+lieu
+##ect
+indigo
+sarcastic
+bahia
+stump
+attach
+dvds
+frankenstein
+lille
+approx
+scriptures
+pollen
+##script
+nmi
+overseen
+##ivism
+tides
+proponent
+newmarket
+inherit
+milling
+##erland
+centralized
+##rou
+distributors
+credentials
+drawers
+abbreviation
+##lco
+##xon
+downing
+uncomfortably
+ripe
+##oes
+erase
+franchises
+##ever
+populace
+##bery
+##khar
+decomposition
+pleas
+##tet
+daryl
+sabah
+##stle
+##wide
+fearless
+genie
+lesions
+annette
+##ogist
+oboe
+appendix
+nair
+dripped
+petitioned
+maclean
+mosquito
+parrot
+rpg
+hampered
+1648
+operatic
+reservoirs
+##tham
+irrelevant
+jolt
+summarized
+##fp
+medallion
+##taff
+##−
+clawed
+harlow
+narrower
+goddard
+marcia
+bodied
+fremont
+suarez
+altering
+tempest
+mussolini
+porn
+##isms
+sweetly
+oversees
+walkers
+solitude
+grimly
+shrines
+hk
+ich
+supervisors
+hostess
+dietrich
+legitimacy
+brushes
+expressive
+##yp
+dissipated
+##rse
+localized
+systemic
+##nikov
+gettysburg
+##js
+##uaries
+dialogues
+muttering
+251
+housekeeper
+sicilian
+discouraged
+##frey
+beamed
+kaladin
+halftime
+kidnap
+##amo
+##llet
+1754
+synonymous
+depleted
+instituto
+insulin
+reprised
+##opsis
+clashed
+##ctric
+interrupting
+radcliffe
+insisting
+medici
+1715
+ejected
+playfully
+turbulent
+##47
+starvation
+##rini
+shipment
+rebellious
+petersen
+verification
+merits
+##rified
+cakes
+##charged
+1757
+milford
+shortages
+spying
+fidelity
+##aker
+emitted
+storylines
+harvested
+seismic
+##iform
+cheung
+kilda
+theoretically
+barbie
+lynx
+##rgy
+##tius
+goblin
+mata
+poisonous
+##nburg
+reactive
+residues
+obedience
+##евич
+conjecture
+##rac
+401
+hating
+sixties
+kicker
+moaning
+motown
+##bha
+emancipation
+neoclassical
+##hering
+consoles
+ebert
+professorship
+##tures
+sustaining
+assaults
+obeyed
+affluent
+incurred
+tornadoes
+##eber
+##zow
+emphasizing
+highlanders
+cheated
+helmets
+##ctus
+internship
+terence
+bony
+executions
+legislators
+berries
+peninsular
+tinged
+##aco
+1689
+amplifier
+corvette
+ribbons
+lavish
+pennant
+##lander
+worthless
+##chfield
+##forms
+mariano
+pyrenees
+expenditures
+##icides
+chesterfield
+mandir
+tailor
+39th
+sergey
+nestled
+willed
+aristocracy
+devotees
+goodnight
+raaf
+rumored
+weaponry
+remy
+appropriations
+harcourt
+burr
+riaa
+##lence
+limitation
+unnoticed
+guo
+soaking
+swamps
+##tica
+collapsing
+tatiana
+descriptive
+brigham
+psalm
+##chment
+maddox
+##lization
+patti
+caliph
+##aja
+akron
+injuring
+serra
+##ganj
+basins
+##sari
+astonished
+launcher
+##church
+hilary
+wilkins
+sewing
+##sf
+stinging
+##fia
+##ncia
+underwood
+startup
+##ition
+compilations
+vibrations
+embankment
+jurist
+##nity
+bard
+juventus
+groundwater
+kern
+palaces
+helium
+boca
+cramped
+marissa
+soto
+##worm
+jae
+princely
+##ggy
+faso
+bazaar
+warmly
+##voking
+229
+pairing
+##lite
+##grate
+##nets
+wien
+freaked
+ulysses
+rebirth
+##alia
+##rent
+mummy
+guzman
+jimenez
+stilled
+##nitz
+trajectory
+tha
+woken
+archival
+professions
+##pts
+##pta
+hilly
+shadowy
+shrink
+##bolt
+norwood
+glued
+migrate
+stereotypes
+devoid
+##pheus
+625
+evacuate
+horrors
+infancy
+gotham
+knowles
+optic
+downloaded
+sachs
+kingsley
+parramatta
+darryl
+mor
+##onale
+shady
+commence
+confesses
+kan
+##meter
+##placed
+marlborough
+roundabout
+regents
+frigates
+io
+##imating
+gothenburg
+revoked
+carvings
+clockwise
+convertible
+intruder
+##sche
+banged
+##ogo
+vicky
+bourgeois
+##mony
+dupont
+footing
+##gum
+pd
+##real
+buckle
+yun
+penthouse
+sane
+720
+serviced
+stakeholders
+neumann
+bb
+##eers
+comb
+##gam
+catchment
+pinning
+rallies
+typing
+##elles
+forefront
+freiburg
+sweetie
+giacomo
+widowed
+goodwill
+worshipped
+aspirations
+midday
+##vat
+fishery
+##trick
+bournemouth
+turk
+243
+hearth
+ethanol
+guadalajara
+murmurs
+sl
+##uge
+afforded
+scripted
+##hta
+wah
+##jn
+coroner
+translucent
+252
+memorials
+puck
+progresses
+clumsy
+##race
+315
+candace
+recounted
+##27
+##slin
+##uve
+filtering
+##mac
+howl
+strata
+heron
+leveled
+##ays
+dubious
+##oja
+##т
+##wheel
+citations
+exhibiting
+##laya
+##mics
+##pods
+turkic
+##lberg
+injunction
+##ennial
+##mit
+antibodies
+##44
+organise
+##rigues
+cardiovascular
+cushion
+inverness
+##zquez
+dia
+cocoa
+sibling
+##tman
+##roid
+expanse
+feasible
+tunisian
+algiers
+##relli
+rus
+bloomberg
+dso
+westphalia
+bro
+tacoma
+281
+downloads
+##ours
+konrad
+duran
+##hdi
+continuum
+jett
+compares
+legislator
+secession
+##nable
+##gues
+##zuka
+translating
+reacher
+##gley
+##ła
+aleppo
+##agi
+tc
+orchards
+trapping
+linguist
+versatile
+drumming
+postage
+calhoun
+superiors
+##mx
+barefoot
+leary
+##cis
+ignacio
+alfa
+kaplan
+##rogen
+bratislava
+mori
+##vot
+disturb
+haas
+313
+cartridges
+gilmore
+radiated
+salford
+tunic
+hades
+##ulsive
+archeological
+delilah
+magistrates
+auditioned
+brewster
+charters
+empowerment
+blogs
+cappella
+dynasties
+iroquois
+whipping
+##krishna
+raceway
+truths
+myra
+weaken
+judah
+mcgregor
+##horse
+mic
+refueling
+37th
+burnley
+bosses
+markus
+premio
+query
+##gga
+dunbar
+##economic
+darkest
+lyndon
+sealing
+commendation
+reappeared
+##mun
+addicted
+ezio
+slaughtered
+satisfactory
+shuffle
+##eves
+##thic
+##uj
+fortification
+warrington
+##otto
+resurrected
+fargo
+mane
+##utable
+##lei
+##space
+foreword
+ox
+##aris
+##vern
+abrams
+hua
+##mento
+sakura
+##alo
+uv
+sentimental
+##skaya
+midfield
+##eses
+sturdy
+scrolls
+macleod
+##kyu
+entropy
+##lance
+mitochondrial
+cicero
+excelled
+thinner
+convoys
+perceive
+##oslav
+##urable
+systematically
+grind
+burkina
+287
+##tagram
+ops
+##aman
+guantanamo
+##cloth
+##tite
+forcefully
+wavy
+##jou
+pointless
+##linger
+##tze
+layton
+portico
+superficial
+clerical
+outlaws
+##hism
+burials
+muir
+##inn
+creditors
+hauling
+rattle
+##leg
+calais
+monde
+archers
+reclaimed
+dwell
+wexford
+hellenic
+falsely
+remorse
+##tek
+dough
+furnishings
+##uttered
+gabon
+neurological
+novice
+##igraphy
+contemplated
+pulpit
+nightstand
+saratoga
+##istan
+documenting
+pulsing
+taluk
+##firmed
+busted
+marital
+##rien
+disagreements
+wasps
+##yes
+hodge
+mcdonnell
+mimic
+fran
+pendant
+dhabi
+musa
+##nington
+congratulations
+argent
+darrell
+concussion
+losers
+regrets
+thessaloniki
+reversal
+donaldson
+hardwood
+thence
+achilles
+ritter
+##eran
+demonic
+jurgen
+prophets
+goethe
+eki
+classmate
+buff
+##cking
+yank
+irrational
+##inging
+perished
+seductive
+qur
+sourced
+##crat
+##typic
+mustard
+ravine
+barre
+horizontally
+characterization
+phylogenetic
+boise
+##dit
+##runner
+##tower
+brutally
+intercourse
+seduce
+##bbing
+fay
+ferris
+ogden
+amar
+nik
+unarmed
+##inator
+evaluating
+kyrgyzstan
+sweetness
+##lford
+##oki
+mccormick
+meiji
+notoriety
+stimulate
+disrupt
+figuring
+instructional
+mcgrath
+##zoo
+groundbreaking
+##lto
+flinch
+khorasan
+agrarian
+bengals
+mixer
+radiating
+##sov
+ingram
+pitchers
+nad
+tariff
+##cript
+tata
+##codes
+##emi
+##ungen
+appellate
+lehigh
+##bled
+##giri
+brawl
+duct
+texans
+##ciation
+##ropolis
+skipper
+speculative
+vomit
+doctrines
+stresses
+253
+davy
+graders
+whitehead
+jozef
+timely
+cumulative
+haryana
+paints
+appropriately
+boon
+cactus
+##ales
+##pid
+dow
+legions
+##pit
+perceptions
+1730
+picturesque
+##yse
+periphery
+rune
+wr
+##aha
+celtics
+sentencing
+whoa
+##erin
+confirms
+variance
+425
+moines
+mathews
+spade
+rave
+m1
+fronted
+fx
+blending
+alleging
+reared
+##gl
+237
+##paper
+grassroots
+eroded
+##free
+##physical
+directs
+ordeal
+##sław
+accelerate
+hacker
+rooftop
+##inia
+lev
+buys
+cebu
+devote
+##lce
+specialising
+##ulsion
+choreographed
+repetition
+warehouses
+##ryl
+paisley
+tuscany
+analogy
+sorcerer
+hash
+huts
+shards
+descends
+exclude
+nix
+chaplin
+gaga
+ito
+vane
+##drich
+causeway
+misconduct
+limo
+orchestrated
+glands
+jana
+##kot
+u2
+##mple
+##sons
+branching
+contrasts
+scoop
+longed
+##virus
+chattanooga
+##75
+syrup
+cornerstone
+##tized
+##mind
+##iaceae
+careless
+precedence
+frescoes
+##uet
+chilled
+consult
+modelled
+snatch
+peat
+##thermal
+caucasian
+humane
+relaxation
+spins
+temperance
+##lbert
+occupations
+lambda
+hybrids
+moons
+mp3
+##oese
+247
+rolf
+societal
+yerevan
+ness
+##ssler
+befriended
+mechanized
+nominate
+trough
+boasted
+cues
+seater
+##hom
+bends
+##tangle
+conductors
+emptiness
+##lmer
+eurasian
+adriatic
+tian
+##cie
+anxiously
+lark
+propellers
+chichester
+jock
+ev
+2a
+##holding
+credible
+recounts
+tori
+loyalist
+abduction
+##hoot
+##redo
+nepali
+##mite
+ventral
+tempting
+##ango
+##crats
+steered
+##wice
+javelin
+dipping
+laborers
+prentice
+looming
+titanium
+##ː
+badges
+emir
+tensor
+##ntation
+egyptians
+rash
+denies
+hawthorne
+lombard
+showers
+wehrmacht
+dietary
+trojan
+##reus
+welles
+executing
+horseshoe
+lifeboat
+##lak
+elsa
+infirmary
+nearing
+roberta
+boyer
+mutter
+trillion
+joanne
+##fine
+##oked
+sinks
+vortex
+uruguayan
+clasp
+sirius
+##block
+accelerator
+prohibit
+sunken
+byu
+chronological
+diplomats
+ochreous
+510
+symmetrical
+1644
+maia
+##tology
+salts
+reigns
+atrocities
+##ия
+hess
+bared
+issn
+##vyn
+cater
+saturated
+##cycle
+##isse
+sable
+voyager
+dyer
+yusuf
+##inge
+fountains
+wolff
+##39
+##nni
+engraving
+rollins
+atheist
+ominous
+##ault
+herr
+chariot
+martina
+strung
+##fell
+##farlane
+horrific
+sahib
+gazes
+saetan
+erased
+ptolemy
+##olic
+flushing
+lauderdale
+analytic
+##ices
+530
+navarro
+beak
+gorilla
+herrera
+broom
+guadalupe
+raiding
+sykes
+311
+bsc
+deliveries
+1720
+invasions
+carmichael
+tajikistan
+thematic
+ecumenical
+sentiments
+onstage
+##rians
+##brand
+##sume
+catastrophic
+flanks
+molten
+##arns
+waller
+aimee
+terminating
+##icing
+alternately
+##oche
+nehru
+printers
+outraged
+##eving
+empires
+template
+banners
+repetitive
+za
+##oise
+vegetarian
+##tell
+guiana
+opt
+cavendish
+lucknow
+synthesized
+##hani
+##mada
+finalized
+##ctable
+fictitious
+mayoral
+unreliable
+##enham
+embracing
+peppers
+rbis
+##chio
+##neo
+inhibition
+slashed
+togo
+orderly
+embroidered
+safari
+salty
+236
+barron
+benito
+totaled
+##dak
+pubs
+simulated
+caden
+devin
+tolkien
+momma
+welding
+sesame
+##ept
+gottingen
+hardness
+630
+shaman
+temeraire
+620
+adequately
+pediatric
+##kit
+ck
+assertion
+radicals
+composure
+cadence
+seafood
+beaufort
+lazarus
+mani
+warily
+cunning
+kurdistan
+249
+cantata
+##kir
+ares
+##41
+##clusive
+nape
+townland
+geared
+insulted
+flutter
+boating
+violate
+draper
+dumping
+malmo
+##hh
+##romatic
+firearm
+alta
+bono
+obscured
+##clave
+exceeds
+panorama
+unbelievable
+##train
+preschool
+##essed
+disconnected
+installing
+rescuing
+secretaries
+accessibility
+##castle
+##drive
+##ifice
+##film
+bouts
+slug
+waterway
+mindanao
+##buro
+##ratic
+halves
+##ل
+calming
+liter
+maternity
+adorable
+bragg
+electrification
+mcc
+##dote
+roxy
+schizophrenia
+##body
+munoz
+kaye
+whaling
+239
+mil
+tingling
+tolerant
+##ago
+unconventional
+volcanoes
+##finder
+deportivo
+##llie
+robson
+kaufman
+neuroscience
+wai
+deportation
+masovian
+scraping
+converse
+##bh
+hacking
+bulge
+##oun
+administratively
+yao
+580
+amp
+mammoth
+booster
+claremont
+hooper
+nomenclature
+pursuits
+mclaughlin
+melinda
+##sul
+catfish
+barclay
+substrates
+taxa
+zee
+originals
+kimberly
+packets
+padma
+##ality
+borrowing
+ostensibly
+solvent
+##bri
+##genesis
+##mist
+lukas
+shreveport
+veracruz
+##ь
+##lou
+##wives
+cheney
+tt
+anatolia
+hobbs
+##zyn
+cyclic
+radiant
+alistair
+greenish
+siena
+dat
+independents
+##bation
+conform
+pieter
+hyper
+applicant
+bradshaw
+spores
+telangana
+vinci
+inexpensive
+nuclei
+322
+jang
+nme
+soho
+spd
+##ign
+cradled
+receptionist
+pow
+##43
+##rika
+fascism
+##ifer
+experimenting
+##ading
+##iec
+##region
+345
+jocelyn
+maris
+stair
+nocturnal
+toro
+constabulary
+elgin
+##kker
+msc
+##giving
+##schen
+##rase
+doherty
+doping
+sarcastically
+batter
+maneuvers
+##cano
+##apple
+##gai
+##git
+intrinsic
+##nst
+##stor
+1753
+showtime
+cafes
+gasps
+lviv
+ushered
+##thed
+fours
+restart
+astonishment
+transmitting
+flyer
+shrugs
+##sau
+intriguing
+cones
+dictated
+mushrooms
+medial
+##kovsky
+##elman
+escorting
+gaped
+##26
+godfather
+##door
+##sell
+djs
+recaptured
+timetable
+vila
+1710
+3a
+aerodrome
+mortals
+scientology
+##orne
+angelina
+mag
+convection
+unpaid
+insertion
+intermittent
+lego
+##nated
+endeavor
+kota
+pereira
+##lz
+304
+bwv
+glamorgan
+insults
+agatha
+fey
+##cend
+fleetwood
+mahogany
+protruding
+steamship
+zeta
+##arty
+mcguire
+suspense
+##sphere
+advising
+urges
+##wala
+hurriedly
+meteor
+gilded
+inline
+arroyo
+stalker
+##oge
+excitedly
+revered
+##cure
+earle
+introductory
+##break
+##ilde
+mutants
+puff
+pulses
+reinforcement
+##haling
+curses
+lizards
+stalk
+correlated
+##fixed
+fallout
+macquarie
+##unas
+bearded
+denton
+heaving
+802
+##ocation
+winery
+assign
+dortmund
+##lkirk
+everest
+invariant
+charismatic
+susie
+##elling
+bled
+lesley
+telegram
+sumner
+bk
+##ogen
+##к
+wilcox
+needy
+colbert
+duval
+##iferous
+##mbled
+allotted
+attends
+imperative
+##hita
+replacements
+hawker
+##inda
+insurgency
+##zee
+##eke
+casts
+##yla
+680
+ives
+transitioned
+##pack
+##powering
+authoritative
+baylor
+flex
+cringed
+plaintiffs
+woodrow
+##skie
+drastic
+ape
+aroma
+unfolded
+commotion
+nt
+preoccupied
+theta
+routines
+lasers
+privatization
+wand
+domino
+ek
+clenching
+nsa
+strategically
+showered
+bile
+handkerchief
+pere
+storing
+christophe
+insulting
+316
+nakamura
+romani
+asiatic
+magdalena
+palma
+cruises
+stripping
+405
+konstantin
+soaring
+##berman
+colloquially
+forerunner
+havilland
+incarcerated
+parasites
+sincerity
+##utus
+disks
+plank
+saigon
+##ining
+corbin
+homo
+ornaments
+powerhouse
+##tlement
+chong
+fastened
+feasibility
+idf
+morphological
+usable
+##nish
+##zuki
+aqueduct
+jaguars
+keepers
+##flies
+aleksandr
+faust
+assigns
+ewing
+bacterium
+hurled
+tricky
+hungarians
+integers
+wallis
+321
+yamaha
+##isha
+hushed
+oblivion
+aviator
+evangelist
+friars
+##eller
+monograph
+ode
+##nary
+airplanes
+labourers
+charms
+##nee
+1661
+hagen
+tnt
+rudder
+fiesta
+transcript
+dorothea
+ska
+inhibitor
+maccabi
+retorted
+raining
+encompassed
+clauses
+menacing
+1642
+lineman
+##gist
+vamps
+##ape
+##dick
+gloom
+##rera
+dealings
+easing
+seekers
+##nut
+##pment
+helens
+unmanned
+##anu
+##isson
+basics
+##amy
+##ckman
+adjustments
+1688
+brutality
+horne
+##zell
+sui
+##55
+##mable
+aggregator
+##thal
+rhino
+##drick
+##vira
+counters
+zoom
+##01
+##rting
+mn
+montenegrin
+packard
+##unciation
+##♭
+##kki
+reclaim
+scholastic
+thugs
+pulsed
+##icia
+syriac
+quan
+saddam
+banda
+kobe
+blaming
+buddies
+dissent
+##lusion
+##usia
+corbett
+jaya
+delle
+erratic
+lexie
+##hesis
+435
+amiga
+hermes
+##pressing
+##leen
+chapels
+gospels
+jamal
+##uating
+compute
+revolving
+warp
+##sso
+##thes
+armory
+##eras
+##gol
+antrim
+loki
+##kow
+##asian
+##good
+##zano
+braid
+handwriting
+subdistrict
+funky
+pantheon
+##iculate
+concurrency
+estimation
+improper
+juliana
+##his
+newcomers
+johnstone
+staten
+communicated
+##oco
+##alle
+sausage
+stormy
+##stered
+##tters
+superfamily
+##grade
+acidic
+collateral
+tabloid
+##oped
+##rza
+bladder
+austen
+##ellant
+mcgraw
+##hay
+hannibal
+mein
+aquino
+lucifer
+wo
+badger
+boar
+cher
+christensen
+greenberg
+interruption
+##kken
+jem
+244
+mocked
+bottoms
+cambridgeshire
+##lide
+sprawling
+##bbly
+eastwood
+ghent
+synth
+##buck
+advisers
+##bah
+nominally
+hapoel
+qu
+daggers
+estranged
+fabricated
+towels
+vinnie
+wcw
+misunderstanding
+anglia
+nothin
+unmistakable
+##dust
+##lova
+chilly
+marquette
+truss
+##edge
+##erine
+reece
+##lty
+##chemist
+##connected
+272
+308
+41st
+bash
+raion
+waterfalls
+##ump
+##main
+labyrinth
+queue
+theorist
+##istle
+bharatiya
+flexed
+soundtracks
+rooney
+leftist
+patrolling
+wharton
+plainly
+alleviate
+eastman
+schuster
+topographic
+engages
+immensely
+unbearable
+fairchild
+1620
+dona
+lurking
+parisian
+oliveira
+ia
+indictment
+hahn
+bangladeshi
+##aster
+vivo
+##uming
+##ential
+antonia
+expects
+indoors
+kildare
+harlan
+##logue
+##ogenic
+##sities
+forgiven
+##wat
+childish
+tavi
+##mide
+##orra
+plausible
+grimm
+successively
+scooted
+##bola
+##dget
+##rith
+spartans
+emery
+flatly
+azure
+epilogue
+##wark
+flourish
+##iny
+##tracted
+##overs
+##oshi
+bestseller
+distressed
+receipt
+spitting
+hermit
+topological
+##cot
+drilled
+subunit
+francs
+##layer
+eel
+##fk
+##itas
+octopus
+footprint
+petitions
+ufo
+##say
+##foil
+interfering
+leaking
+palo
+##metry
+thistle
+valiant
+##pic
+narayan
+mcpherson
+##fast
+gonzales
+##ym
+##enne
+dustin
+novgorod
+solos
+##zman
+doin
+##raph
+##patient
+##meyer
+soluble
+ashland
+cuffs
+carole
+pendleton
+whistling
+vassal
+##river
+deviation
+revisited
+constituents
+rallied
+rotate
+loomed
+##eil
+##nting
+amateurs
+augsburg
+auschwitz
+crowns
+skeletons
+##cona
+bonnet
+257
+dummy
+globalization
+simeon
+sleeper
+mandal
+differentiated
+##crow
+##mare
+milne
+bundled
+exasperated
+talmud
+owes
+segregated
+##feng
+##uary
+dentist
+piracy
+props
+##rang
+devlin
+##torium
+malicious
+paws
+##laid
+dependency
+##ergy
+##fers
+##enna
+258
+pistons
+rourke
+jed
+grammatical
+tres
+maha
+wig
+512
+ghostly
+jayne
+##achal
+##creen
+##ilis
+##lins
+##rence
+designate
+##with
+arrogance
+cambodian
+clones
+showdown
+throttle
+twain
+##ception
+lobes
+metz
+nagoya
+335
+braking
+##furt
+385
+roaming
+##minster
+amin
+crippled
+##37
+##llary
+indifferent
+hoffmann
+idols
+intimidating
+1751
+261
+influenza
+memo
+onions
+1748
+bandage
+consciously
+##landa
+##rage
+clandestine
+observes
+swiped
+tangle
+##ener
+##jected
+##trum
+##bill
+##lta
+hugs
+congresses
+josiah
+spirited
+##dek
+humanist
+managerial
+filmmaking
+inmate
+rhymes
+debuting
+grimsby
+ur
+##laze
+duplicate
+vigor
+##tf
+republished
+bolshevik
+refurbishment
+antibiotics
+martini
+methane
+newscasts
+royale
+horizons
+levant
+iain
+visas
+##ischen
+paler
+##around
+manifestation
+snuck
+alf
+chop
+futile
+pedestal
+rehab
+##kat
+bmg
+kerman
+res
+fairbanks
+jarrett
+abstraction
+saharan
+##zek
+1746
+procedural
+clearer
+kincaid
+sash
+luciano
+##ffey
+crunch
+helmut
+##vara
+revolutionaries
+##tute
+creamy
+leach
+##mmon
+1747
+permitting
+nes
+plight
+wendell
+##lese
+contra
+ts
+clancy
+ipa
+mach
+staples
+autopsy
+disturbances
+nueva
+karin
+pontiac
+##uding
+proxy
+venerable
+haunt
+leto
+bergman
+expands
+##helm
+wal
+##pipe
+canning
+celine
+cords
+obesity
+##enary
+intrusion
+planner
+##phate
+reasoned
+sequencing
+307
+harrow
+##chon
+##dora
+marred
+mcintyre
+repay
+tarzan
+darting
+248
+harrisburg
+margarita
+repulsed
+##hur
+##lding
+belinda
+hamburger
+novo
+compliant
+runways
+bingham
+registrar
+skyscraper
+ic
+cuthbert
+improvisation
+livelihood
+##corp
+##elial
+admiring
+##dened
+sporadic
+believer
+casablanca
+popcorn
+##29
+asha
+shovel
+##bek
+##dice
+coiled
+tangible
+##dez
+casper
+elsie
+resin
+tenderness
+rectory
+##ivision
+avail
+sonar
+##mori
+boutique
+##dier
+guerre
+bathed
+upbringing
+vaulted
+sandals
+blessings
+##naut
+##utnant
+1680
+306
+foxes
+pia
+corrosion
+hesitantly
+confederates
+crystalline
+footprints
+shapiro
+tirana
+valentin
+drones
+45th
+microscope
+shipments
+texted
+inquisition
+wry
+guernsey
+unauthorized
+resigning
+760
+ripple
+schubert
+stu
+reassure
+felony
+##ardo
+brittle
+koreans
+##havan
+##ives
+dun
+implicit
+tyres
+##aldi
+##lth
+magnolia
+##ehan
+##puri
+##poulos
+aggressively
+fei
+gr
+familiarity
+##poo
+indicative
+##trust
+fundamentally
+jimmie
+overrun
+395
+anchors
+moans
+##opus
+britannia
+armagh
+##ggle
+purposely
+seizing
+##vao
+bewildered
+mundane
+avoidance
+cosmopolitan
+geometridae
+quartermaster
+caf
+415
+chatter
+engulfed
+gleam
+purge
+##icate
+juliette
+jurisprudence
+guerra
+revisions
+##bn
+casimir
+brew
+##jm
+1749
+clapton
+cloudy
+conde
+hermitage
+278
+simulations
+torches
+vincenzo
+matteo
+##rill
+hidalgo
+booming
+westbound
+accomplishment
+tentacles
+unaffected
+##sius
+annabelle
+flopped
+sloping
+##litz
+dreamer
+interceptor
+vu
+##loh
+consecration
+copying
+messaging
+breaker
+climates
+hospitalized
+1752
+torino
+afternoons
+winfield
+witnessing
+##teacher
+breakers
+choirs
+sawmill
+coldly
+##ege
+sipping
+haste
+uninhabited
+conical
+bibliography
+pamphlets
+severn
+edict
+##oca
+deux
+illnesses
+grips
+##pl
+rehearsals
+sis
+thinkers
+tame
+##keepers
+1690
+acacia
+reformer
+##osed
+##rys
+shuffling
+##iring
+##shima
+eastbound
+ionic
+rhea
+flees
+littered
+##oum
+rocker
+vomiting
+groaning
+champ
+overwhelmingly
+civilizations
+paces
+sloop
+adoptive
+##tish
+skaters
+##vres
+aiding
+mango
+##joy
+nikola
+shriek
+##ignon
+pharmaceuticals
+##mg
+tuna
+calvert
+gustavo
+stocked
+yearbook
+##urai
+##mana
+computed
+subsp
+riff
+hanoi
+kelvin
+hamid
+moors
+pastures
+summons
+jihad
+nectar
+##ctors
+bayou
+untitled
+pleasing
+vastly
+republics
+intellect
+##η
+##ulio
+##tou
+crumbling
+stylistic
+sb
+##ی
+consolation
+frequented
+h₂o
+walden
+widows
+##iens
+404
+##ignment
+chunks
+improves
+288
+grit
+recited
+##dev
+snarl
+sociological
+##arte
+##gul
+inquired
+##held
+bruise
+clube
+consultancy
+homogeneous
+hornets
+multiplication
+pasta
+prick
+savior
+##grin
+##kou
+##phile
+yoon
+##gara
+grimes
+vanishing
+cheering
+reacting
+bn
+distillery
+##quisite
+##vity
+coe
+dockyard
+massif
+##jord
+escorts
+voss
+##valent
+byte
+chopped
+hawke
+illusions
+workings
+floats
+##koto
+##vac
+kv
+annapolis
+madden
+##onus
+alvaro
+noctuidae
+##cum
+##scopic
+avenge
+steamboat
+forte
+illustrates
+erika
+##trip
+570
+dew
+nationalities
+bran
+manifested
+thirsty
+diversified
+muscled
+reborn
+##standing
+arson
+##lessness
+##dran
+##logram
+##boys
+##kushima
+##vious
+willoughby
+##phobia
+286
+alsace
+dashboard
+yuki
+##chai
+granville
+myspace
+publicized
+tricked
+##gang
+adjective
+##ater
+relic
+reorganisation
+enthusiastically
+indications
+saxe
+##lassified
+consolidate
+iec
+padua
+helplessly
+ramps
+renaming
+regulars
+pedestrians
+accents
+convicts
+inaccurate
+lowers
+mana
+##pati
+barrie
+bjp
+outta
+someplace
+berwick
+flanking
+invoked
+marrow
+sparsely
+excerpts
+clothed
+rei
+##ginal
+wept
+##straße
+##vish
+alexa
+excel
+##ptive
+membranes
+aquitaine
+creeks
+cutler
+sheppard
+implementations
+ns
+##dur
+fragrance
+budge
+concordia
+magnesium
+marcelo
+##antes
+gladly
+vibrating
+##rral
+##ggles
+montrose
+##omba
+lew
+seamus
+1630
+cocky
+##ament
+##uen
+bjorn
+##rrick
+fielder
+fluttering
+##lase
+methyl
+kimberley
+mcdowell
+reductions
+barbed
+##jic
+##tonic
+aeronautical
+condensed
+distracting
+##promising
+huffed
+##cala
+##sle
+claudius
+invincible
+missy
+pious
+balthazar
+ci
+##lang
+butte
+combo
+orson
+##dication
+myriad
+1707
+silenced
+##fed
+##rh
+coco
+netball
+yourselves
+##oza
+clarify
+heller
+peg
+durban
+etudes
+offender
+roast
+blackmail
+curvature
+##woods
+vile
+309
+illicit
+suriname
+##linson
+overture
+1685
+bubbling
+gymnast
+tucking
+##mming
+##ouin
+maldives
+##bala
+gurney
+##dda
+##eased
+##oides
+backside
+pinto
+jars
+racehorse
+tending
+##rdial
+baronetcy
+wiener
+duly
+##rke
+barbarian
+cupping
+flawed
+##thesis
+bertha
+pleistocene
+puddle
+swearing
+##nob
+##tically
+fleeting
+prostate
+amulet
+educating
+##mined
+##iti
+##tler
+75th
+jens
+respondents
+analytics
+cavaliers
+papacy
+raju
+##iente
+##ulum
+##tip
+funnel
+271
+disneyland
+##lley
+sociologist
+##iam
+2500
+faulkner
+louvre
+menon
+##dson
+276
+##ower
+afterlife
+mannheim
+peptide
+referees
+comedians
+meaningless
+##anger
+##laise
+fabrics
+hurley
+renal
+sleeps
+##bour
+##icle
+breakout
+kristin
+roadside
+animator
+clover
+disdain
+unsafe
+redesign
+##urity
+firth
+barnsley
+portage
+reset
+narrows
+268
+commandos
+expansive
+speechless
+tubular
+##lux
+essendon
+eyelashes
+smashwords
+##yad
+##bang
+##claim
+craved
+sprinted
+chet
+somme
+astor
+wrocław
+orton
+266
+bane
+##erving
+##uing
+mischief
+##amps
+##sund
+scaling
+terre
+##xious
+impairment
+offenses
+undermine
+moi
+soy
+contiguous
+arcadia
+inuit
+seam
+##tops
+macbeth
+rebelled
+##icative
+##iot
+590
+elaborated
+frs
+uniformed
+##dberg
+259
+powerless
+priscilla
+stimulated
+980
+qc
+arboretum
+frustrating
+trieste
+bullock
+##nified
+enriched
+glistening
+intern
+##adia
+locus
+nouvelle
+ollie
+ike
+lash
+starboard
+ee
+tapestry
+headlined
+hove
+rigged
+##vite
+pollock
+##yme
+thrive
+clustered
+cas
+roi
+gleamed
+olympiad
+##lino
+pressured
+regimes
+##hosis
+##lick
+ripley
+##ophone
+kickoff
+gallon
+rockwell
+##arable
+crusader
+glue
+revolutions
+scrambling
+1714
+grover
+##jure
+englishman
+aztec
+263
+contemplating
+coven
+ipad
+preach
+triumphant
+tufts
+##esian
+rotational
+##phus
+328
+falkland
+##brates
+strewn
+clarissa
+rejoin
+environmentally
+glint
+banded
+drenched
+moat
+albanians
+johor
+rr
+maestro
+malley
+nouveau
+shaded
+taxonomy
+v6
+adhere
+bunk
+airfields
+##ritan
+1741
+encompass
+remington
+tran
+##erative
+amelie
+mazda
+friar
+morals
+passions
+##zai
+breadth
+vis
+##hae
+argus
+burnham
+caressing
+insider
+rudd
+##imov
+##mini
+##rso
+italianate
+murderous
+textual
+wainwright
+armada
+bam
+weave
+timer
+##taken
+##nh
+fra
+##crest
+ardent
+salazar
+taps
+tunis
+##ntino
+allegro
+gland
+philanthropic
+##chester
+implication
+##optera
+esq
+judas
+noticeably
+wynn
+##dara
+inched
+indexed
+crises
+villiers
+bandit
+royalties
+patterned
+cupboard
+interspersed
+accessory
+isla
+kendrick
+entourage
+stitches
+##esthesia
+headwaters
+##ior
+interlude
+distraught
+draught
+1727
+##basket
+biased
+sy
+transient
+triad
+subgenus
+adapting
+kidd
+shortstop
+##umatic
+dimly
+spiked
+mcleod
+reprint
+nellie
+pretoria
+windmill
+##cek
+singled
+##mps
+273
+reunite
+##orous
+747
+bankers
+outlying
+##omp
+##ports
+##tream
+apologies
+cosmetics
+patsy
+##deh
+##ocks
+##yson
+bender
+nantes
+serene
+##nad
+lucha
+mmm
+323
+##cius
+##gli
+cmll
+coinage
+nestor
+juarez
+##rook
+smeared
+sprayed
+twitching
+sterile
+irina
+embodied
+juveniles
+enveloped
+miscellaneous
+cancers
+dq
+gulped
+luisa
+crested
+swat
+donegal
+ref
+##anov
+##acker
+hearst
+mercantile
+##lika
+doorbell
+ua
+vicki
+##alla
+##som
+bilbao
+psychologists
+stryker
+sw
+horsemen
+turkmenistan
+wits
+##national
+anson
+mathew
+screenings
+##umb
+rihanna
+##agne
+##nessy
+aisles
+##iani
+##osphere
+hines
+kenton
+saskatoon
+tasha
+truncated
+##champ
+##itan
+mildred
+advises
+fredrik
+interpreting
+inhibitors
+##athi
+spectroscopy
+##hab
+##kong
+karim
+panda
+##oia
+##nail
+##vc
+conqueror
+kgb
+leukemia
+##dity
+arrivals
+cheered
+pisa
+phosphorus
+shielded
+##riated
+mammal
+unitarian
+urgently
+chopin
+sanitary
+##mission
+spicy
+drugged
+hinges
+##tort
+tipping
+trier
+impoverished
+westchester
+##caster
+267
+epoch
+nonstop
+##gman
+##khov
+aromatic
+centrally
+cerro
+##tively
+##vio
+billions
+modulation
+sedimentary
+283
+facilitating
+outrageous
+goldstein
+##eak
+##kt
+ld
+maitland
+penultimate
+pollard
+##dance
+fleets
+spaceship
+vertebrae
+##nig
+alcoholism
+als
+recital
+##bham
+##ference
+##omics
+m2
+##bm
+trois
+##tropical
+##в
+commemorates
+##meric
+marge
+##raction
+1643
+670
+cosmetic
+ravaged
+##ige
+catastrophe
+eng
+##shida
+albrecht
+arterial
+bellamy
+decor
+harmon
+##rde
+bulbs
+synchronized
+vito
+easiest
+shetland
+shielding
+wnba
+##glers
+##ssar
+##riam
+brianna
+cumbria
+##aceous
+##rard
+cores
+thayer
+##nsk
+brood
+hilltop
+luminous
+carts
+keynote
+larkin
+logos
+##cta
+##ا
+##mund
+##quay
+lilith
+tinted
+277
+wrestle
+mobilization
+##uses
+sequential
+siam
+bloomfield
+takahashi
+274
+##ieving
+presenters
+ringo
+blazed
+witty
+##oven
+##ignant
+devastation
+haydn
+harmed
+newt
+therese
+##peed
+gershwin
+molina
+rabbis
+sudanese
+001
+innate
+restarted
+##sack
+##fus
+slices
+wb
+##shah
+enroll
+hypothetical
+hysterical
+1743
+fabio
+indefinite
+warped
+##hg
+exchanging
+525
+unsuitable
+##sboro
+gallo
+1603
+bret
+cobalt
+homemade
+##hunter
+mx
+operatives
+##dhar
+terraces
+durable
+latch
+pens
+whorls
+##ctuated
+##eaux
+billing
+ligament
+succumbed
+##gly
+regulators
+spawn
+##brick
+##stead
+filmfare
+rochelle
+##nzo
+1725
+circumstance
+saber
+supplements
+##nsky
+##tson
+crowe
+wellesley
+carrot
+##9th
+##movable
+primate
+drury
+sincerely
+topical
+##mad
+##rao
+callahan
+kyiv
+smarter
+tits
+undo
+##yeh
+announcements
+anthologies
+barrio
+nebula
+##islaus
+##shaft
+##tyn
+bodyguards
+2021
+assassinate
+barns
+emmett
+scully
+##mah
+##yd
+##eland
+##tino
+##itarian
+demoted
+gorman
+lashed
+prized
+adventist
+writ
+##gui
+alla
+invertebrates
+##ausen
+1641
+amman
+1742
+align
+healy
+redistribution
+##gf
+##rize
+insulation
+##drop
+adherents
+hezbollah
+vitro
+ferns
+yanking
+269
+php
+registering
+uppsala
+cheerleading
+confines
+mischievous
+tully
+##ross
+49th
+docked
+roam
+stipulated
+pumpkin
+##bry
+prompt
+##ezer
+blindly
+shuddering
+craftsmen
+frail
+scented
+katharine
+scramble
+shaggy
+sponge
+helix
+zaragoza
+279
+##52
+43rd
+backlash
+fontaine
+seizures
+posse
+cowan
+nonfiction
+telenovela
+wwii
+hammered
+undone
+##gpur
+encircled
+irs
+##ivation
+artefacts
+oneself
+searing
+smallpox
+##belle
+##osaurus
+shandong
+breached
+upland
+blushing
+rankin
+infinitely
+psyche
+tolerated
+docking
+evicted
+##col
+unmarked
+##lving
+gnome
+lettering
+litres
+musique
+##oint
+benevolent
+##jal
+blackened
+##anna
+mccall
+racers
+tingle
+##ocene
+##orestation
+introductions
+radically
+292
+##hiff
+##باد
+1610
+1739
+munchen
+plead
+##nka
+condo
+scissors
+##sight
+##tens
+apprehension
+##cey
+##yin
+hallmark
+watering
+formulas
+sequels
+##llas
+aggravated
+bae
+commencing
+##building
+enfield
+prohibits
+marne
+vedic
+civilized
+euclidean
+jagger
+beforehand
+blasts
+dumont
+##arney
+##nem
+740
+conversions
+hierarchical
+rios
+simulator
+##dya
+##lellan
+hedges
+oleg
+thrusts
+shadowed
+darby
+maximize
+1744
+gregorian
+##nded
+##routed
+sham
+unspecified
+##hog
+emory
+factual
+##smo
+##tp
+fooled
+##rger
+ortega
+wellness
+marlon
+##oton
+##urance
+casket
+keating
+ley
+enclave
+##ayan
+char
+influencing
+jia
+##chenko
+412
+ammonia
+erebidae
+incompatible
+violins
+cornered
+##arat
+grooves
+astronauts
+columbian
+rampant
+fabrication
+kyushu
+mahmud
+vanish
+##dern
+mesopotamia
+##lete
+ict
+##rgen
+caspian
+kenji
+pitted
+##vered
+999
+grimace
+roanoke
+tchaikovsky
+twinned
+##analysis
+##awan
+xinjiang
+arias
+clemson
+kazakh
+sizable
+1662
+##khand
+##vard
+plunge
+tatum
+vittorio
+##nden
+cholera
+##dana
+##oper
+bracing
+indifference
+projectile
+superliga
+##chee
+realises
+upgrading
+299
+porte
+retribution
+##vies
+nk
+stil
+##resses
+ama
+bureaucracy
+blackberry
+bosch
+testosterone
+collapses
+greer
+##pathic
+ioc
+fifties
+malls
+##erved
+bao
+baskets
+adolescents
+siegfried
+##osity
+##tosis
+mantra
+detecting
+existent
+fledgling
+##cchi
+dissatisfied
+gan
+telecommunication
+mingled
+sobbed
+6000
+controversies
+outdated
+taxis
+##raus
+fright
+slams
+##lham
+##fect
+##tten
+detectors
+fetal
+tanned
+##uw
+fray
+goth
+olympian
+skipping
+mandates
+scratches
+sheng
+unspoken
+hyundai
+tracey
+hotspur
+restrictive
+##buch
+americana
+mundo
+##bari
+burroughs
+diva
+vulcan
+##6th
+distinctions
+thumping
+##ngen
+mikey
+sheds
+fide
+rescues
+springsteen
+vested
+valuation
+##ece
+##ely
+pinnacle
+rake
+sylvie
+##edo
+almond
+quivering
+##irus
+alteration
+faltered
+##wad
+51st
+hydra
+ticked
+##kato
+recommends
+##dicated
+antigua
+arjun
+stagecoach
+wilfred
+trickle
+pronouns
+##pon
+aryan
+nighttime
+##anian
+gall
+pea
+stitch
+##hei
+leung
+milos
+##dini
+eritrea
+nexus
+starved
+snowfall
+kant
+parasitic
+cot
+discus
+hana
+strikers
+appleton
+kitchens
+##erina
+##partisan
+##itha
+##vius
+disclose
+metis
+##channel
+1701
+tesla
+##vera
+fitch
+1735
+blooded
+##tila
+decimal
+##tang
+##bai
+cyclones
+eun
+bottled
+peas
+pensacola
+basha
+bolivian
+crabs
+boil
+lanterns
+partridge
+roofed
+1645
+necks
+##phila
+opined
+patting
+##kla
+##lland
+chuckles
+volta
+whereupon
+##nche
+devout
+euroleague
+suicidal
+##dee
+inherently
+involuntary
+knitting
+nasser
+##hide
+puppets
+colourful
+courageous
+southend
+stills
+miraculous
+hodgson
+richer
+rochdale
+ethernet
+greta
+uniting
+prism
+umm
+##haya
+##itical
+##utation
+deterioration
+pointe
+prowess
+##ropriation
+lids
+scranton
+billings
+subcontinent
+##koff
+##scope
+brute
+kellogg
+psalms
+degraded
+##vez
+stanisław
+##ructured
+ferreira
+pun
+astonishing
+gunnar
+##yat
+arya
+prc
+gottfried
+##tight
+excursion
+##ographer
+dina
+##quil
+##nare
+huffington
+illustrious
+wilbur
+gundam
+verandah
+##zard
+naacp
+##odle
+constructive
+fjord
+kade
+##naud
+generosity
+thrilling
+baseline
+cayman
+frankish
+plastics
+accommodations
+zoological
+##fting
+cedric
+qb
+motorized
+##dome
+##otted
+squealed
+tackled
+canucks
+budgets
+situ
+asthma
+dail
+gabled
+grasslands
+whimpered
+writhing
+judgments
+##65
+minnie
+pv
+##carbon
+bananas
+grille
+domes
+monique
+odin
+maguire
+markham
+tierney
+##estra
+##chua
+libel
+poke
+speedy
+atrium
+laval
+notwithstanding
+##edly
+fai
+kala
+##sur
+robb
+##sma
+listings
+luz
+supplementary
+tianjin
+##acing
+enzo
+jd
+ric
+scanner
+croats
+transcribed
+##49
+arden
+cv
+##hair
+##raphy
+##lver
+##uy
+357
+seventies
+staggering
+alam
+horticultural
+hs
+regression
+timbers
+blasting
+##ounded
+montagu
+manipulating
+##cit
+catalytic
+1550
+troopers
+##meo
+condemnation
+fitzpatrick
+##oire
+##roved
+inexperienced
+1670
+castes
+##lative
+outing
+314
+dubois
+flicking
+quarrel
+ste
+learners
+1625
+iq
+whistled
+##class
+282
+classify
+tariffs
+temperament
+355
+folly
+liszt
+##yles
+immersed
+jordanian
+ceasefire
+apparel
+extras
+maru
+fished
+##bio
+harta
+stockport
+assortment
+craftsman
+paralysis
+transmitters
+##cola
+blindness
+##wk
+fatally
+proficiency
+solemnly
+##orno
+repairing
+amore
+groceries
+ultraviolet
+##chase
+schoolhouse
+##tua
+resurgence
+nailed
+##otype
+##×
+ruse
+saliva
+diagrams
+##tructing
+albans
+rann
+thirties
+1b
+antennas
+hilarious
+cougars
+paddington
+stats
+##eger
+breakaway
+ipod
+reza
+authorship
+prohibiting
+scoffed
+##etz
+##ttle
+conscription
+defected
+trondheim
+##fires
+ivanov
+keenan
+##adan
+##ciful
+##fb
+##slow
+locating
+##ials
+##tford
+cadiz
+basalt
+blankly
+interned
+rags
+rattling
+##tick
+carpathian
+reassured
+sync
+bum
+guildford
+iss
+staunch
+##onga
+astronomers
+sera
+sofie
+emergencies
+susquehanna
+##heard
+duc
+mastery
+vh1
+williamsburg
+bayer
+buckled
+craving
+##khan
+##rdes
+bloomington
+##write
+alton
+barbecue
+##bians
+justine
+##hri
+##ndt
+delightful
+smartphone
+newtown
+photon
+retrieval
+peugeot
+hissing
+##monium
+##orough
+flavors
+lighted
+relaunched
+tainted
+##games
+##lysis
+anarchy
+microscopic
+hopping
+adept
+evade
+evie
+##beau
+inhibit
+sinn
+adjustable
+hurst
+intuition
+wilton
+cisco
+44th
+lawful
+lowlands
+stockings
+thierry
+##dalen
+##hila
+##nai
+fates
+prank
+tb
+maison
+lobbied
+provocative
+1724
+4a
+utopia
+##qual
+carbonate
+gujarati
+purcell
+##rford
+curtiss
+##mei
+overgrown
+arenas
+mediation
+swallows
+##rnik
+respectful
+turnbull
+##hedron
+##hope
+alyssa
+ozone
+##ʻi
+ami
+gestapo
+johansson
+snooker
+canteen
+cuff
+declines
+empathy
+stigma
+##ags
+##iner
+##raine
+taxpayers
+gui
+volga
+##wright
+##copic
+lifespan
+overcame
+tattooed
+enactment
+giggles
+##ador
+##camp
+barrington
+bribe
+obligatory
+orbiting
+peng
+##enas
+elusive
+sucker
+##vating
+cong
+hardship
+empowered
+anticipating
+estrada
+cryptic
+greasy
+detainees
+planck
+sudbury
+plaid
+dod
+marriott
+kayla
+##ears
+##vb
+##zd
+mortally
+##hein
+cognition
+radha
+319
+liechtenstein
+meade
+richly
+argyle
+harpsichord
+liberalism
+trumpets
+lauded
+tyrant
+salsa
+tiled
+lear
+promoters
+reused
+slicing
+trident
+##chuk
+##gami
+##lka
+cantor
+checkpoint
+##points
+gaul
+leger
+mammalian
+##tov
+##aar
+##schaft
+doha
+frenchman
+nirvana
+##vino
+delgado
+headlining
+##eron
+##iography
+jug
+tko
+1649
+naga
+intersections
+##jia
+benfica
+nawab
+##suka
+ashford
+gulp
+##deck
+##vill
+##rug
+brentford
+frazier
+pleasures
+dunne
+potsdam
+shenzhen
+dentistry
+##tec
+flanagan
+##dorff
+##hear
+chorale
+dinah
+prem
+quezon
+##rogated
+relinquished
+sutra
+terri
+##pani
+flaps
+##rissa
+poly
+##rnet
+homme
+aback
+##eki
+linger
+womb
+##kson
+##lewood
+doorstep
+orthodoxy
+threaded
+westfield
+##rval
+dioceses
+fridays
+subsided
+##gata
+loyalists
+##biotic
+##ettes
+letterman
+lunatic
+prelate
+tenderly
+invariably
+souza
+thug
+winslow
+##otide
+furlongs
+gogh
+jeopardy
+##runa
+pegasus
+##umble
+humiliated
+standalone
+tagged
+##roller
+freshmen
+klan
+##bright
+attaining
+initiating
+transatlantic
+logged
+viz
+##uance
+1723
+combatants
+intervening
+stephane
+chieftain
+despised
+grazed
+317
+cdc
+galveston
+godzilla
+macro
+simulate
+##planes
+parades
+##esses
+960
+##ductive
+##unes
+equator
+overdose
+##cans
+##hosh
+##lifting
+joshi
+epstein
+sonora
+treacherous
+aquatics
+manchu
+responsive
+##sation
+supervisory
+##christ
+##llins
+##ibar
+##balance
+##uso
+kimball
+karlsruhe
+mab
+##emy
+ignores
+phonetic
+reuters
+spaghetti
+820
+almighty
+danzig
+rumbling
+tombstone
+designations
+lured
+outset
+##felt
+supermarkets
+##wt
+grupo
+kei
+kraft
+susanna
+##blood
+comprehension
+genealogy
+##aghan
+##verted
+redding
+##ythe
+1722
+bowing
+##pore
+##roi
+lest
+sharpened
+fulbright
+valkyrie
+sikhs
+##unds
+swans
+bouquet
+merritt
+##tage
+##venting
+commuted
+redhead
+clerks
+leasing
+cesare
+dea
+hazy
+##vances
+fledged
+greenfield
+servicemen
+##gical
+armando
+blackout
+dt
+sagged
+downloadable
+intra
+potion
+pods
+##4th
+##mism
+xp
+attendants
+gambia
+stale
+##ntine
+plump
+asteroids
+rediscovered
+buds
+flea
+hive
+##neas
+1737
+classifications
+debuts
+##eles
+olympus
+scala
+##eurs
+##gno
+##mute
+hummed
+sigismund
+visuals
+wiggled
+await
+pilasters
+clench
+sulfate
+##ances
+bellevue
+enigma
+trainee
+snort
+##sw
+clouded
+denim
+##rank
+##rder
+churning
+hartman
+lodges
+riches
+sima
+##missible
+accountable
+socrates
+regulates
+mueller
+##cr
+1702
+avoids
+solids
+himalayas
+nutrient
+pup
+##jevic
+squat
+fades
+nec
+##lates
+##pina
+##rona
+##ου
+privateer
+tequila
+##gative
+##mpton
+apt
+hornet
+immortals
+##dou
+asturias
+cleansing
+dario
+##rries
+##anta
+etymology
+servicing
+zhejiang
+##venor
+##nx
+horned
+erasmus
+rayon
+relocating
+£10
+##bags
+escalated
+promenade
+stubble
+2010s
+artisans
+axial
+liquids
+mora
+sho
+yoo
+##tsky
+bundles
+oldies
+##nally
+notification
+bastion
+##ths
+sparkle
+##lved
+1728
+leash
+pathogen
+highs
+##hmi
+immature
+880
+gonzaga
+ignatius
+mansions
+monterrey
+sweets
+bryson
+##loe
+polled
+regatta
+brightest
+pei
+rosy
+squid
+hatfield
+payroll
+addict
+meath
+cornerback
+heaviest
+lodging
+##mage
+capcom
+rippled
+##sily
+barnet
+mayhem
+ymca
+snuggled
+rousseau
+##cute
+blanchard
+284
+fragmented
+leighton
+chromosomes
+risking
+##md
+##strel
+##utter
+corinne
+coyotes
+cynical
+hiroshi
+yeomanry
+##ractive
+ebook
+grading
+mandela
+plume
+agustin
+magdalene
+##rkin
+bea
+femme
+trafford
+##coll
+##lun
+##tance
+52nd
+fourier
+upton
+##mental
+camilla
+gust
+iihf
+islamabad
+longevity
+##kala
+feldman
+netting
+##rization
+endeavour
+foraging
+mfa
+orr
+##open
+greyish
+contradiction
+graz
+##ruff
+handicapped
+marlene
+tweed
+oaxaca
+spp
+campos
+miocene
+pri
+configured
+cooks
+pluto
+cozy
+pornographic
+##entes
+70th
+fairness
+glided
+jonny
+lynne
+rounding
+sired
+##emon
+##nist
+remade
+uncover
+##mack
+complied
+lei
+newsweek
+##jured
+##parts
+##enting
+##pg
+293
+finer
+guerrillas
+athenian
+deng
+disused
+stepmother
+accuse
+gingerly
+seduction
+521
+confronting
+##walker
+##going
+gora
+nostalgia
+sabres
+virginity
+wrenched
+##minated
+syndication
+wielding
+eyre
+##56
+##gnon
+##igny
+behaved
+taxpayer
+sweeps
+##growth
+childless
+gallant
+##ywood
+amplified
+geraldine
+scrape
+##ffi
+babylonian
+fresco
+##rdan
+##kney
+##position
+1718
+restricting
+tack
+fukuoka
+osborn
+selector
+partnering
+##dlow
+318
+gnu
+kia
+tak
+whitley
+gables
+##54
+##mania
+mri
+softness
+immersion
+##bots
+##evsky
+1713
+chilling
+insignificant
+pcs
+##uis
+elites
+lina
+purported
+supplemental
+teaming
+##americana
+##dding
+##inton
+proficient
+rouen
+##nage
+##rret
+niccolo
+selects
+##bread
+fluffy
+1621
+gruff
+knotted
+mukherjee
+polgara
+thrash
+nicholls
+secluded
+smoothing
+thru
+corsica
+loaf
+whitaker
+inquiries
+##rrier
+##kam
+indochina
+289
+marlins
+myles
+peking
+##tea
+extracts
+pastry
+superhuman
+connacht
+vogel
+##ditional
+##het
+##udged
+##lash
+gloss
+quarries
+refit
+teaser
+##alic
+##gaon
+20s
+materialized
+sling
+camped
+pickering
+tung
+tracker
+pursuant
+##cide
+cranes
+soc
+##cini
+##typical
+##viere
+anhalt
+overboard
+workout
+chores
+fares
+orphaned
+stains
+##logie
+fenton
+surpassing
+joyah
+triggers
+##itte
+grandmaster
+##lass
+##lists
+clapping
+fraudulent
+ledger
+nagasaki
+##cor
+##nosis
+##tsa
+eucalyptus
+tun
+##icio
+##rney
+##tara
+dax
+heroism
+ina
+wrexham
+onboard
+unsigned
+##dates
+moshe
+galley
+winnie
+droplets
+exiles
+praises
+watered
+noodles
+##aia
+fein
+adi
+leland
+multicultural
+stink
+bingo
+comets
+erskine
+modernized
+canned
+constraint
+domestically
+chemotherapy
+featherweight
+stifled
+##mum
+darkly
+irresistible
+refreshing
+hasty
+isolate
+##oys
+kitchener
+planners
+##wehr
+cages
+yarn
+implant
+toulon
+elects
+childbirth
+yue
+##lind
+##lone
+cn
+rightful
+sportsman
+junctions
+remodeled
+specifies
+##rgh
+291
+##oons
+complimented
+##urgent
+lister
+ot
+##logic
+bequeathed
+cheekbones
+fontana
+gabby
+##dial
+amadeus
+corrugated
+maverick
+resented
+triangles
+##hered
+##usly
+nazareth
+tyrol
+1675
+assent
+poorer
+sectional
+aegean
+##cous
+296
+nylon
+ghanaian
+##egorical
+##weig
+cushions
+forbid
+fusiliers
+obstruction
+somerville
+##scia
+dime
+earrings
+elliptical
+leyte
+oder
+polymers
+timmy
+atm
+midtown
+piloted
+settles
+continual
+externally
+mayfield
+##uh
+enrichment
+henson
+keane
+persians
+1733
+benji
+braden
+pep
+324
+##efe
+contenders
+pepsi
+valet
+##isches
+298
+##asse
+##earing
+goofy
+stroll
+##amen
+authoritarian
+occurrences
+adversary
+ahmedabad
+tangent
+toppled
+dorchester
+1672
+modernism
+marxism
+islamist
+charlemagne
+exponential
+racks
+unicode
+brunette
+mbc
+pic
+skirmish
+##bund
+##lad
+##powered
+##yst
+hoisted
+messina
+shatter
+##ctum
+jedi
+vantage
+##music
+##neil
+clemens
+mahmoud
+corrupted
+authentication
+lowry
+nils
+##washed
+omnibus
+wounding
+jillian
+##itors
+##opped
+serialized
+narcotics
+handheld
+##arm
+##plicity
+intersecting
+stimulating
+##onis
+crate
+fellowships
+hemingway
+casinos
+climatic
+fordham
+copeland
+drip
+beatty
+leaflets
+robber
+brothel
+madeira
+##hedral
+sphinx
+ultrasound
+##vana
+valor
+forbade
+leonid
+villas
+##aldo
+duane
+marquez
+##cytes
+disadvantaged
+forearms
+kawasaki
+reacts
+consular
+lax
+uncles
+uphold
+##hopper
+concepcion
+dorsey
+lass
+##izan
+arching
+passageway
+1708
+researches
+tia
+internationals
+##graphs
+##opers
+distinguishes
+javanese
+divert
+##uven
+plotted
+##listic
+##rwin
+##erik
+##tify
+affirmative
+signifies
+validation
+##bson
+kari
+felicity
+georgina
+zulu
+##eros
+##rained
+##rath
+overcoming
+##dot
+argyll
+##rbin
+1734
+chiba
+ratification
+windy
+earls
+parapet
+##marks
+hunan
+pristine
+astrid
+punta
+##gart
+brodie
+##kota
+##oder
+malaga
+minerva
+rouse
+##phonic
+bellowed
+pagoda
+portals
+reclamation
+##gur
+##odies
+##⁄₄
+parentheses
+quoting
+allergic
+palette
+showcases
+benefactor
+heartland
+nonlinear
+##tness
+bladed
+cheerfully
+scans
+##ety
+##hone
+1666
+girlfriends
+pedersen
+hiram
+sous
+##liche
+##nator
+1683
+##nery
+##orio
+##umen
+bobo
+primaries
+smiley
+##cb
+unearthed
+uniformly
+fis
+metadata
+1635
+ind
+##oted
+recoil
+##titles
+##tura
+##ια
+406
+hilbert
+jamestown
+mcmillan
+tulane
+seychelles
+##frid
+antics
+coli
+fated
+stucco
+##grants
+1654
+bulky
+accolades
+arrays
+caledonian
+carnage
+optimism
+puebla
+##tative
+##cave
+enforcing
+rotherham
+seo
+dunlop
+aeronautics
+chimed
+incline
+zoning
+archduke
+hellenistic
+##oses
+##sions
+candi
+thong
+##ople
+magnate
+rustic
+##rsk
+projective
+slant
+##offs
+danes
+hollis
+vocalists
+##ammed
+congenital
+contend
+gesellschaft
+##ocating
+##pressive
+douglass
+quieter
+##cm
+##kshi
+howled
+salim
+spontaneously
+townsville
+buena
+southport
+##bold
+kato
+1638
+faerie
+stiffly
+##vus
+##rled
+297
+flawless
+realising
+taboo
+##7th
+bytes
+straightening
+356
+jena
+##hid
+##rmin
+cartwright
+berber
+bertram
+soloists
+411
+noses
+417
+coping
+fission
+hardin
+inca
+##cen
+1717
+mobilized
+vhf
+##raf
+biscuits
+curate
+##85
+##anial
+331
+gaunt
+neighbourhoods
+1540
+##abas
+blanca
+bypassed
+sockets
+behold
+coincidentally
+##bane
+nara
+shave
+splinter
+terrific
+##arion
+##erian
+commonplace
+juris
+redwood
+waistband
+boxed
+caitlin
+fingerprints
+jennie
+naturalized
+##ired
+balfour
+craters
+jody
+bungalow
+hugely
+quilt
+glitter
+pigeons
+undertaker
+bulging
+constrained
+goo
+##sil
+##akh
+assimilation
+reworked
+##person
+persuasion
+##pants
+felicia
+##cliff
+##ulent
+1732
+explodes
+##dun
+##inium
+##zic
+lyman
+vulture
+hog
+overlook
+begs
+northwards
+ow
+spoil
+##urer
+fatima
+favorably
+accumulate
+sargent
+sorority
+corresponded
+dispersal
+kochi
+toned
+##imi
+##lita
+internacional
+newfound
+##agger
+##lynn
+##rigue
+booths
+peanuts
+##eborg
+medicare
+muriel
+nur
+##uram
+crates
+millennia
+pajamas
+worsened
+##breakers
+jimi
+vanuatu
+yawned
+##udeau
+carousel
+##hony
+hurdle
+##ccus
+##mounted
+##pod
+rv
+##eche
+airship
+ambiguity
+compulsion
+recapture
+##claiming
+arthritis
+##osomal
+1667
+asserting
+ngc
+sniffing
+dade
+discontent
+glendale
+ported
+##amina
+defamation
+rammed
+##scent
+fling
+livingstone
+##fleet
+875
+##ppy
+apocalyptic
+comrade
+lcd
+##lowe
+cessna
+eine
+persecuted
+subsistence
+demi
+hoop
+reliefs
+710
+coptic
+progressing
+stemmed
+perpetrators
+1665
+priestess
+##nio
+dobson
+ebony
+rooster
+itf
+tortricidae
+##bbon
+##jian
+cleanup
+##jean
+##øy
+1721
+eighties
+taxonomic
+holiness
+##hearted
+##spar
+antilles
+showcasing
+stabilized
+##nb
+gia
+mascara
+michelangelo
+dawned
+##uria
+##vinsky
+extinguished
+fitz
+grotesque
+£100
+##fera
+##loid
+##mous
+barges
+neue
+throbbed
+cipher
+johnnie
+##a1
+##mpt
+outburst
+##swick
+spearheaded
+administrations
+c1
+heartbreak
+pixels
+pleasantly
+##enay
+lombardy
+plush
+##nsed
+bobbie
+##hly
+reapers
+tremor
+xiang
+minogue
+substantive
+hitch
+barak
+##wyl
+kwan
+##encia
+910
+obscene
+elegance
+indus
+surfer
+bribery
+conserve
+##hyllum
+##masters
+horatio
+##fat
+apes
+rebound
+psychotic
+##pour
+iteration
+##mium
+##vani
+botanic
+horribly
+antiques
+dispose
+paxton
+##hli
+##wg
+timeless
+1704
+disregard
+engraver
+hounds
+##bau
+##version
+looted
+uno
+facilitates
+groans
+masjid
+rutland
+antibody
+disqualification
+decatur
+footballers
+quake
+slacks
+48th
+rein
+scribe
+stabilize
+commits
+exemplary
+tho
+##hort
+##chison
+pantry
+traversed
+##hiti
+disrepair
+identifiable
+vibrated
+baccalaureate
+##nnis
+csa
+interviewing
+##iensis
+##raße
+greaves
+wealthiest
+343
+classed
+jogged
+£5
+##58
+##atal
+illuminating
+knicks
+respecting
+##uno
+scrubbed
+##iji
+##dles
+kruger
+moods
+growls
+raider
+silvia
+chefs
+kam
+vr
+cree
+percival
+##terol
+gunter
+counterattack
+defiant
+henan
+ze
+##rasia
+##riety
+equivalence
+submissions
+##fra
+##thor
+bautista
+mechanically
+##heater
+cornice
+herbal
+templar
+##mering
+outputs
+ruining
+ligand
+renumbered
+extravagant
+mika
+blockbuster
+eta
+insurrection
+##ilia
+darkening
+ferocious
+pianos
+strife
+kinship
+##aer
+melee
+##anor
+##iste
+##may
+##oue
+decidedly
+weep
+##jad
+##missive
+##ppel
+354
+puget
+unease
+##gnant
+1629
+hammering
+kassel
+ob
+wessex
+##lga
+bromwich
+egan
+paranoia
+utilization
+##atable
+##idad
+contradictory
+provoke
+##ols
+##ouring
+##tangled
+knesset
+##very
+##lette
+plumbing
+##sden
+##¹
+greensboro
+occult
+sniff
+338
+zev
+beaming
+gamer
+haggard
+mahal
+##olt
+##pins
+mendes
+utmost
+briefing
+gunnery
+##gut
+##pher
+##zh
+##rok
+1679
+khalifa
+sonya
+##boot
+principals
+urbana
+wiring
+##liffe
+##minating
+##rrado
+dahl
+nyu
+skepticism
+np
+townspeople
+ithaca
+lobster
+somethin
+##fur
+##arina
+##−1
+freighter
+zimmerman
+biceps
+contractual
+##herton
+amend
+hurrying
+subconscious
+##anal
+336
+meng
+clermont
+spawning
+##eia
+##lub
+dignitaries
+impetus
+snacks
+spotting
+twigs
+##bilis
+##cz
+##ouk
+libertadores
+nic
+skylar
+##aina
+##firm
+gustave
+asean
+##anum
+dieter
+legislatures
+flirt
+bromley
+trolls
+umar
+##bbies
+##tyle
+blah
+parc
+bridgeport
+crank
+negligence
+##nction
+46th
+constantin
+molded
+bandages
+seriousness
+00pm
+siegel
+carpets
+compartments
+upbeat
+statehood
+##dner
+##edging
+marko
+730
+platt
+##hane
+paving
+##iy
+1738
+abbess
+impatience
+limousine
+nbl
+##talk
+441
+lucille
+mojo
+nightfall
+robbers
+##nais
+karel
+brisk
+calves
+replicate
+ascribed
+telescopes
+##olf
+intimidated
+##reen
+ballast
+specialization
+##sit
+aerodynamic
+caliphate
+rainer
+visionary
+##arded
+epsilon
+##aday
+##onte
+aggregation
+auditory
+boosted
+reunification
+kathmandu
+loco
+robyn
+402
+acknowledges
+appointing
+humanoid
+newell
+redeveloped
+restraints
+##tained
+barbarians
+chopper
+1609
+italiana
+##lez
+##lho
+investigates
+wrestlemania
+##anies
+##bib
+690
+##falls
+creaked
+dragoons
+gravely
+minions
+stupidity
+volley
+##harat
+##week
+musik
+##eries
+##uously
+fungal
+massimo
+semantics
+malvern
+##ahl
+##pee
+discourage
+embryo
+imperialism
+1910s
+profoundly
+##ddled
+jiangsu
+sparkled
+stat
+##holz
+sweatshirt
+tobin
+##iction
+sneered
+##cheon
+##oit
+brit
+causal
+smyth
+##neuve
+diffuse
+perrin
+silvio
+##ipes
+##recht
+detonated
+iqbal
+selma
+##nism
+##zumi
+roasted
+##riders
+tay
+##ados
+##mament
+##mut
+##rud
+840
+completes
+nipples
+cfa
+flavour
+hirsch
+##laus
+calderon
+sneakers
+moravian
+##ksha
+1622
+rq
+294
+##imeters
+bodo
+##isance
+##pre
+##ronia
+anatomical
+excerpt
+##lke
+dh
+kunst
+##tablished
+##scoe
+biomass
+panted
+unharmed
+gael
+housemates
+montpellier
+##59
+coa
+rodents
+tonic
+hickory
+singleton
+##taro
+451
+1719
+aldo
+breaststroke
+dempsey
+och
+rocco
+##cuit
+merton
+dissemination
+midsummer
+serials
+##idi
+haji
+polynomials
+##rdon
+gs
+enoch
+prematurely
+shutter
+taunton
+£3
+##grating
+##inates
+archangel
+harassed
+##asco
+326
+archway
+dazzling
+##ecin
+1736
+sumo
+wat
+##kovich
+1086
+honneur
+##ently
+##nostic
+##ttal
+##idon
+1605
+403
+1716
+blogger
+rents
+##gnan
+hires
+##ikh
+##dant
+howie
+##rons
+handler
+retracted
+shocks
+1632
+arun
+duluth
+kepler
+trumpeter
+##lary
+peeking
+seasoned
+trooper
+##mara
+laszlo
+##iciencies
+##rti
+heterosexual
+##inatory
+##ssion
+indira
+jogging
+##inga
+##lism
+beit
+dissatisfaction
+malice
+##ately
+nedra
+peeling
+##rgeon
+47th
+stadiums
+475
+vertigo
+##ains
+iced
+restroom
+##plify
+##tub
+illustrating
+pear
+##chner
+##sibility
+inorganic
+rappers
+receipts
+watery
+##kura
+lucinda
+##oulos
+reintroduced
+##8th
+##tched
+gracefully
+saxons
+nutritional
+wastewater
+rained
+favourites
+bedrock
+fisted
+hallways
+likeness
+upscale
+##lateral
+1580
+blinds
+prequel
+##pps
+##tama
+deter
+humiliating
+restraining
+tn
+vents
+1659
+laundering
+recess
+rosary
+tractors
+coulter
+federer
+##ifiers
+##plin
+persistence
+##quitable
+geschichte
+pendulum
+quakers
+##beam
+bassett
+pictorial
+buffet
+koln
+##sitor
+drills
+reciprocal
+shooters
+##57
+##cton
+##tees
+converge
+pip
+dmitri
+donnelly
+yamamoto
+aqua
+azores
+demographics
+hypnotic
+spitfire
+suspend
+wryly
+roderick
+##rran
+sebastien
+##asurable
+mavericks
+##fles
+##200
+himalayan
+prodigy
+##iance
+transvaal
+demonstrators
+handcuffs
+dodged
+mcnamara
+sublime
+1726
+crazed
+##efined
+##till
+ivo
+pondered
+reconciled
+shrill
+sava
+##duk
+bal
+cad
+heresy
+jaipur
+goran
+##nished
+341
+lux
+shelly
+whitehall
+##hre
+israelis
+peacekeeping
+##wled
+1703
+demetrius
+ousted
+##arians
+##zos
+beale
+anwar
+backstroke
+raged
+shrinking
+cremated
+##yck
+benign
+towing
+wadi
+darmstadt
+landfill
+parana
+soothe
+colleen
+sidewalks
+mayfair
+tumble
+hepatitis
+ferrer
+superstructure
+##gingly
+##urse
+##wee
+anthropological
+translators
+##mies
+closeness
+hooves
+##pw
+mondays
+##roll
+##vita
+landscaping
+##urized
+purification
+sock
+thorns
+thwarted
+jalan
+tiberius
+##taka
+saline
+##rito
+confidently
+khyber
+sculptors
+##ij
+brahms
+hammersmith
+inspectors
+battista
+fivb
+fragmentation
+hackney
+##uls
+arresting
+exercising
+antoinette
+bedfordshire
+##zily
+dyed
+##hema
+1656
+racetrack
+variability
+##tique
+1655
+austrians
+deteriorating
+madman
+theorists
+aix
+lehman
+weathered
+1731
+decreed
+eruptions
+1729
+flaw
+quinlan
+sorbonne
+flutes
+nunez
+1711
+adored
+downwards
+fable
+rasped
+1712
+moritz
+mouthful
+renegade
+shivers
+stunts
+dysfunction
+restrain
+translit
+327
+pancakes
+##avio
+##cision
+##tray
+351
+vial
+##lden
+bain
+##maid
+##oxide
+chihuahua
+malacca
+vimes
+##rba
+##rnier
+1664
+donnie
+plaques
+##ually
+337
+bangs
+floppy
+huntsville
+loretta
+nikolay
+##otte
+eater
+handgun
+ubiquitous
+##hett
+eras
+zodiac
+1634
+##omorphic
+1820s
+##zog
+cochran
+##bula
+##lithic
+warring
+##rada
+dalai
+excused
+blazers
+mcconnell
+reeling
+bot
+este
+##abi
+geese
+hoax
+taxon
+##bla
+guitarists
+##icon
+condemning
+hunts
+inversion
+moffat
+taekwondo
+##lvis
+1624
+stammered
+##rest
+##rzy
+sousa
+fundraiser
+marylebone
+navigable
+uptown
+cabbage
+daniela
+salman
+shitty
+whimper
+##kian
+##utive
+programmers
+protections
+rm
+##rmi
+##rued
+forceful
+##enes
+fuss
+##tao
+##wash
+brat
+oppressive
+reykjavik
+spartak
+ticking
+##inkles
+##kiewicz
+adolph
+horst
+maui
+protege
+straighten
+cpc
+landau
+concourse
+clements
+resultant
+##ando
+imaginative
+joo
+reactivated
+##rem
+##ffled
+##uising
+consultative
+##guide
+flop
+kaitlyn
+mergers
+parenting
+somber
+##vron
+supervise
+vidhan
+##imum
+courtship
+exemplified
+harmonies
+medallist
+refining
+##rrow
+##ка
+amara
+##hum
+780
+goalscorer
+sited
+overshadowed
+rohan
+displeasure
+secretive
+multiplied
+osman
+##orth
+engravings
+padre
+##kali
+##veda
+miniatures
+mis
+##yala
+clap
+pali
+rook
+##cana
+1692
+57th
+antennae
+astro
+oskar
+1628
+bulldog
+crotch
+hackett
+yucatan
+##sure
+amplifiers
+brno
+ferrara
+migrating
+##gree
+thanking
+turing
+##eza
+mccann
+ting
+andersson
+onslaught
+gaines
+ganga
+incense
+standardization
+##mation
+sentai
+scuba
+stuffing
+turquoise
+waivers
+alloys
+##vitt
+regaining
+vaults
+##clops
+##gizing
+digger
+furry
+memorabilia
+probing
+##iad
+payton
+rec
+deutschland
+filippo
+opaque
+seamen
+zenith
+afrikaans
+##filtration
+disciplined
+inspirational
+##merie
+banco
+confuse
+grafton
+tod
+##dgets
+championed
+simi
+anomaly
+biplane
+##ceptive
+electrode
+##para
+1697
+cleavage
+crossbow
+swirl
+informant
+##lars
+##osta
+afi
+bonfire
+spec
+##oux
+lakeside
+slump
+##culus
+##lais
+##qvist
+##rrigan
+1016
+facades
+borg
+inwardly
+cervical
+xl
+pointedly
+050
+stabilization
+##odon
+chests
+1699
+hacked
+ctv
+orthogonal
+suzy
+##lastic
+gaulle
+jacobite
+rearview
+##cam
+##erted
+ashby
+##drik
+##igate
+##mise
+##zbek
+affectionately
+canine
+disperse
+latham
+##istles
+##ivar
+spielberg
+##orin
+##idium
+ezekiel
+cid
+##sg
+durga
+middletown
+##cina
+customized
+frontiers
+harden
+##etano
+##zzy
+1604
+bolsheviks
+##66
+coloration
+yoko
+##bedo
+briefs
+slabs
+debra
+liquidation
+plumage
+##oin
+blossoms
+dementia
+subsidy
+1611
+proctor
+relational
+jerseys
+parochial
+ter
+##ici
+esa
+peshawar
+cavalier
+loren
+cpi
+idiots
+shamrock
+1646
+dutton
+malabar
+mustache
+##endez
+##ocytes
+referencing
+terminates
+marche
+yarmouth
+##sop
+acton
+mated
+seton
+subtly
+baptised
+beige
+extremes
+jolted
+kristina
+telecast
+##actic
+safeguard
+waldo
+##baldi
+##bular
+endeavors
+sloppy
+subterranean
+##ensburg
+##itung
+delicately
+pigment
+tq
+##scu
+1626
+##ound
+collisions
+coveted
+herds
+##personal
+##meister
+##nberger
+chopra
+##ricting
+abnormalities
+defective
+galician
+lucie
+##dilly
+alligator
+likened
+##genase
+burundi
+clears
+complexion
+derelict
+deafening
+diablo
+fingered
+champaign
+dogg
+enlist
+isotope
+labeling
+mrna
+##erre
+brilliance
+marvelous
+##ayo
+1652
+crawley
+ether
+footed
+dwellers
+deserts
+hamish
+rubs
+warlock
+skimmed
+##lizer
+870
+buick
+embark
+heraldic
+irregularities
+##ajan
+kiara
+##kulam
+##ieg
+antigen
+kowalski
+##lge
+oakley
+visitation
+##mbit
+vt
+##suit
+1570
+murderers
+##miento
+##rites
+chimneys
+##sling
+condemn
+custer
+exchequer
+havre
+##ghi
+fluctuations
+##rations
+dfb
+hendricks
+vaccines
+##tarian
+nietzsche
+biking
+juicy
+##duced
+brooding
+scrolling
+selangor
+##ragan
+352
+annum
+boomed
+seminole
+sugarcane
+##dna
+departmental
+dismissing
+innsbruck
+arteries
+ashok
+batavia
+daze
+kun
+overtook
+##rga
+##tlan
+beheaded
+gaddafi
+holm
+electronically
+faulty
+galilee
+fractures
+kobayashi
+##lized
+gunmen
+magma
+aramaic
+mala
+eastenders
+inference
+messengers
+bf
+##qu
+407
+bathrooms
+##vere
+1658
+flashbacks
+ideally
+misunderstood
+##jali
+##weather
+mendez
+##grounds
+505
+uncanny
+##iii
+1709
+friendships
+##nbc
+sacrament
+accommodated
+reiterated
+logistical
+pebbles
+thumped
+##escence
+administering
+decrees
+drafts
+##flight
+##cased
+##tula
+futuristic
+picket
+intimidation
+winthrop
+##fahan
+interfered
+339
+afar
+francoise
+morally
+uta
+cochin
+croft
+dwarfs
+##bruck
+##dents
+##nami
+biker
+##hner
+##meral
+nano
+##isen
+##ometric
+##pres
+##ан
+brightened
+meek
+parcels
+securely
+gunners
+##jhl
+##zko
+agile
+hysteria
+##lten
+##rcus
+bukit
+champs
+chevy
+cuckoo
+leith
+sadler
+theologians
+welded
+##section
+1663
+jj
+plurality
+xander
+##rooms
+##formed
+shredded
+temps
+intimately
+pau
+tormented
+##lok
+##stellar
+1618
+charred
+ems
+essen
+##mmel
+alarms
+spraying
+ascot
+blooms
+twinkle
+##abia
+##apes
+internment
+obsidian
+##chaft
+snoop
+##dav
+##ooping
+malibu
+##tension
+quiver
+##itia
+hays
+mcintosh
+travers
+walsall
+##ffie
+1623
+beverley
+schwarz
+plunging
+structurally
+m3
+rosenthal
+vikram
+##tsk
+770
+ghz
+##onda
+##tiv
+chalmers
+groningen
+pew
+reckon
+unicef
+##rvis
+55th
+##gni
+1651
+sulawesi
+avila
+cai
+metaphysical
+screwing
+turbulence
+##mberg
+augusto
+samba
+56th
+baffled
+momentary
+toxin
+##urian
+##wani
+aachen
+condoms
+dali
+steppe
+##3d
+##app
+##oed
+##year
+adolescence
+dauphin
+electrically
+inaccessible
+microscopy
+nikita
+##ega
+atv
+##cel
+##enter
+##oles
+##oteric
+##ы
+accountants
+punishments
+wrongly
+bribes
+adventurous
+clinch
+flinders
+southland
+##hem
+##kata
+gough
+##ciency
+lads
+soared
+##ה
+undergoes
+deformation
+outlawed
+rubbish
+##arus
+##mussen
+##nidae
+##rzburg
+arcs
+##ingdon
+##tituted
+1695
+wheelbase
+wheeling
+bombardier
+campground
+zebra
+##lices
+##oj
+##bain
+lullaby
+##ecure
+donetsk
+wylie
+grenada
+##arding
+##ης
+squinting
+eireann
+opposes
+##andra
+maximal
+runes
+##broken
+##cuting
+##iface
+##ror
+##rosis
+additive
+britney
+adultery
+triggering
+##drome
+detrimental
+aarhus
+containment
+jc
+swapped
+vichy
+##ioms
+madly
+##oric
+##rag
+brant
+##ckey
+##trix
+1560
+1612
+broughton
+rustling
+##stems
+##uder
+asbestos
+mentoring
+##nivorous
+finley
+leaps
+##isan
+apical
+pry
+slits
+substitutes
+##dict
+intuitive
+fantasia
+insistent
+unreasonable
+##igen
+##vna
+domed
+hannover
+margot
+ponder
+##zziness
+impromptu
+jian
+lc
+rampage
+stemming
+##eft
+andrey
+gerais
+whichever
+amnesia
+appropriated
+anzac
+clicks
+modifying
+ultimatum
+cambrian
+maids
+verve
+yellowstone
+##mbs
+conservatoire
+##scribe
+adherence
+dinners
+spectra
+imperfect
+mysteriously
+sidekick
+tatar
+tuba
+##aks
+##ifolia
+distrust
+##athan
+##zle
+c2
+ronin
+zac
+##pse
+celaena
+instrumentalist
+scents
+skopje
+##mbling
+comical
+compensated
+vidal
+condor
+intersect
+jingle
+wavelengths
+##urrent
+mcqueen
+##izzly
+carp
+weasel
+422
+kanye
+militias
+postdoctoral
+eugen
+gunslinger
+##ɛ
+faux
+hospice
+##for
+appalled
+derivation
+dwarves
+##elis
+dilapidated
+##folk
+astoria
+philology
+##lwyn
+##otho
+##saka
+inducing
+philanthropy
+##bf
+##itative
+geek
+markedly
+sql
+##yce
+bessie
+indices
+rn
+##flict
+495
+frowns
+resolving
+weightlifting
+tugs
+cleric
+contentious
+1653
+mania
+rms
+##miya
+##reate
+##ruck
+##tucket
+bien
+eels
+marek
+##ayton
+##cence
+discreet
+unofficially
+##ife
+leaks
+##bber
+1705
+332
+dung
+compressor
+hillsborough
+pandit
+shillings
+distal
+##skin
+381
+##tat
+##you
+nosed
+##nir
+mangrove
+undeveloped
+##idia
+textures
+##inho
+##500
+##rise
+ae
+irritating
+nay
+amazingly
+bancroft
+apologetic
+compassionate
+kata
+symphonies
+##lovic
+airspace
+##lch
+930
+gifford
+precautions
+fulfillment
+sevilla
+vulgar
+martinique
+##urities
+looting
+piccolo
+tidy
+##dermott
+quadrant
+armchair
+incomes
+mathematicians
+stampede
+nilsson
+##inking
+##scan
+foo
+quarterfinal
+##ostal
+shang
+shouldered
+squirrels
+##owe
+344
+vinegar
+##bner
+##rchy
+##systems
+delaying
+##trics
+ars
+dwyer
+rhapsody
+sponsoring
+##gration
+bipolar
+cinder
+starters
+##olio
+##urst
+421
+signage
+##nty
+aground
+figurative
+mons
+acquaintances
+duets
+erroneously
+soyuz
+elliptic
+recreated
+##cultural
+##quette
+##ssed
+##tma
+##zcz
+moderator
+scares
+##itaire
+##stones
+##udence
+juniper
+sighting
+##just
+##nsen
+britten
+calabria
+ry
+bop
+cramer
+forsyth
+stillness
+##л
+airmen
+gathers
+unfit
+##umber
+##upt
+taunting
+##rip
+seeker
+streamlined
+##bution
+holster
+schumann
+tread
+vox
+##gano
+##onzo
+strive
+dil
+reforming
+covent
+newbury
+predicting
+##orro
+decorate
+tre
+##puted
+andover
+ie
+asahi
+dept
+dunkirk
+gills
+##tori
+buren
+huskies
+##stis
+##stov
+abstracts
+bets
+loosen
+##opa
+1682
+yearning
+##glio
+##sir
+berman
+effortlessly
+enamel
+napoli
+persist
+##peration
+##uez
+attache
+elisa
+b1
+invitations
+##kic
+accelerating
+reindeer
+boardwalk
+clutches
+nelly
+polka
+starbucks
+##kei
+adamant
+huey
+lough
+unbroken
+adventurer
+embroidery
+inspecting
+stanza
+##ducted
+naia
+taluka
+##pone
+##roids
+chases
+deprivation
+florian
+##jing
+##ppet
+earthly
+##lib
+##ssee
+colossal
+foreigner
+vet
+freaks
+patrice
+rosewood
+triassic
+upstate
+##pkins
+dominates
+ata
+chants
+ks
+vo
+##400
+##bley
+##raya
+##rmed
+555
+agra
+infiltrate
+##ailing
+##ilation
+##tzer
+##uppe
+##werk
+binoculars
+enthusiast
+fujian
+squeak
+##avs
+abolitionist
+almeida
+boredom
+hampstead
+marsden
+rations
+##ands
+inflated
+334
+bonuses
+rosalie
+patna
+##rco
+329
+detachments
+penitentiary
+54th
+flourishing
+woolf
+##dion
+##etched
+papyrus
+##lster
+##nsor
+##toy
+bobbed
+dismounted
+endelle
+inhuman
+motorola
+tbs
+wince
+wreath
+##ticus
+hideout
+inspections
+sanjay
+disgrace
+infused
+pudding
+stalks
+##urbed
+arsenic
+leases
+##hyl
+##rrard
+collarbone
+##waite
+##wil
+dowry
+##bant
+##edance
+genealogical
+nitrate
+salamanca
+scandals
+thyroid
+necessitated
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##`
+##{
+##|
+##}
+##~
+##¡
+##¢
+##£
+##¤
+##¥
+##¦
+##§
+##¨
+##©
+##ª
+##«
+##¬
+##®
+##±
+##´
+##µ
+##¶
+##·
+##º
+##»
+##¼
+##¾
+##¿
+##æ
+##ð
+##÷
+##þ
+##đ
+##ħ
+##ŋ
+##œ
+##ƒ
+##ɐ
+##ɑ
+##ɒ
+##ɔ
+##ɕ
+##ə
+##ɡ
+##ɣ
+##ɨ
+##ɪ
+##ɫ
+##ɬ
+##ɯ
+##ɲ
+##ɴ
+##ɹ
+##ɾ
+##ʀ
+##ʁ
+##ʂ
+##ʃ
+##ʉ
+##ʊ
+##ʋ
+##ʌ
+##ʎ
+##ʐ
+##ʑ
+##ʒ
+##ʔ
+##ʰ
+##ʲ
+##ʳ
+##ʷ
+##ʸ
+##ʻ
+##ʼ
+##ʾ
+##ʿ
+##ˈ
+##ˡ
+##ˢ
+##ˣ
+##ˤ
+##β
+##γ
+##δ
+##ε
+##ζ
+##θ
+##κ
+##λ
+##μ
+##ξ
+##ο
+##π
+##ρ
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##ω
+##б
+##г
+##д
+##ж
+##з
+##м
+##п
+##с
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##щ
+##ъ
+##э
+##ю
+##ђ
+##є
+##і
+##ј
+##љ
+##њ
+##ћ
+##ӏ
+##ա
+##բ
+##գ
+##դ
+##ե
+##թ
+##ի
+##լ
+##կ
+##հ
+##մ
+##յ
+##ն
+##ո
+##պ
+##ս
+##վ
+##տ
+##ր
+##ւ
+##ք
+##־
+##א
+##ב
+##ג
+##ד
+##ו
+##ז
+##ח
+##ט
+##י
+##ך
+##כ
+##ל
+##ם
+##מ
+##ן
+##נ
+##ס
+##ע
+##ף
+##פ
+##ץ
+##צ
+##ק
+##ר
+##ש
+##ת
+##،
+##ء
+##ب
+##ت
+##ث
+##ج
+##ح
+##خ
+##ذ
+##ز
+##س
+##ش
+##ص
+##ض
+##ط
+##ظ
+##ع
+##غ
+##ـ
+##ف
+##ق
+##ك
+##و
+##ى
+##ٹ
+##پ
+##چ
+##ک
+##گ
+##ں
+##ھ
+##ہ
+##ے
+##अ
+##आ
+##उ
+##ए
+##क
+##ख
+##ग
+##च
+##ज
+##ट
+##ड
+##ण
+##त
+##थ
+##द
+##ध
+##न
+##प
+##ब
+##भ
+##म
+##य
+##र
+##ल
+##व
+##श
+##ष
+##स
+##ह
+##ा
+##ि
+##ी
+##ो
+##।
+##॥
+##ং
+##অ
+##আ
+##ই
+##উ
+##এ
+##ও
+##ক
+##খ
+##গ
+##চ
+##ছ
+##জ
+##ট
+##ড
+##ণ
+##ত
+##থ
+##দ
+##ধ
+##ন
+##প
+##ব
+##ভ
+##ম
+##য
+##র
+##ল
+##শ
+##ষ
+##স
+##হ
+##া
+##ি
+##ী
+##ে
+##க
+##ச
+##ட
+##த
+##ந
+##ன
+##ப
+##ம
+##ய
+##ர
+##ல
+##ள
+##வ
+##ா
+##ி
+##ு
+##ே
+##ை
+##ನ
+##ರ
+##ಾ
+##ක
+##ය
+##ර
+##ල
+##ව
+##ා
+##ก
+##ง
+##ต
+##ท
+##น
+##พ
+##ม
+##ย
+##ร
+##ล
+##ว
+##ส
+##อ
+##า
+##เ
+##་
+##།
+##ག
+##ང
+##ད
+##ན
+##པ
+##བ
+##མ
+##འ
+##ར
+##ལ
+##ས
+##မ
+##ა
+##ბ
+##გ
+##დ
+##ე
+##ვ
+##თ
+##ი
+##კ
+##ლ
+##მ
+##ნ
+##ო
+##რ
+##ს
+##ტ
+##უ
+##ᄀ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄉ
+##ᄊ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅡ
+##ᅢ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅩ
+##ᅪ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᅵ
+##ᆨ
+##ᆫ
+##ᆯ
+##ᆷ
+##ᆸ
+##ᆼ
+##ᴬ
+##ᴮ
+##ᴰ
+##ᴵ
+##ᴺ
+##ᵀ
+##ᵃ
+##ᵇ
+##ᵈ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵖ
+##ᵗ
+##ᵘ
+##ᵣ
+##ᵤ
+##ᵥ
+##ᶜ
+##ᶠ
+##‐
+##‑
+##‒
+##–
+##—
+##―
+##‖
+##‘
+##’
+##‚
+##“
+##”
+##„
+##†
+##‡
+##•
+##…
+##‰
+##′
+##″
+##›
+##‿
+##⁄
+##⁰
+##ⁱ
+##⁴
+##⁵
+##⁶
+##⁷
+##⁸
+##⁹
+##⁻
+##ⁿ
+##₅
+##₆
+##₇
+##₈
+##₉
+##₊
+##₍
+##₎
+##ₐ
+##ₑ
+##ₒ
+##ₓ
+##ₕ
+##ₖ
+##ₗ
+##ₘ
+##ₚ
+##ₛ
+##ₜ
+##₤
+##₩
+##€
+##₱
+##₹
+##ℓ
+##№
+##ℝ
+##™
+##⅓
+##⅔
+##←
+##↑
+##→
+##↓
+##↔
+##↦
+##⇄
+##⇌
+##⇒
+##∂
+##∅
+##∆
+##∇
+##∈
+##∗
+##∘
+##√
+##∞
+##∧
+##∨
+##∩
+##∪
+##≈
+##≡
+##≤
+##≥
+##⊂
+##⊆
+##⊕
+##⊗
+##⋅
+##─
+##│
+##■
+##▪
+##●
+##★
+##☆
+##☉
+##♠
+##♣
+##♥
+##♦
+##♯
+##⟨
+##⟩
+##ⱼ
+##⺩
+##⺼
+##⽥
+##、
+##。
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##〜
+##あ
+##い
+##う
+##え
+##お
+##か
+##き
+##く
+##け
+##こ
+##さ
+##し
+##す
+##せ
+##そ
+##た
+##ち
+##っ
+##つ
+##て
+##と
+##な
+##に
+##ぬ
+##ね
+##の
+##は
+##ひ
+##ふ
+##へ
+##ほ
+##ま
+##み
+##む
+##め
+##も
+##や
+##ゆ
+##よ
+##ら
+##り
+##る
+##れ
+##ろ
+##を
+##ん
+##ァ
+##ア
+##ィ
+##イ
+##ウ
+##ェ
+##エ
+##オ
+##カ
+##キ
+##ク
+##ケ
+##コ
+##サ
+##シ
+##ス
+##セ
+##タ
+##チ
+##ッ
+##ツ
+##テ
+##ト
+##ナ
+##ニ
+##ノ
+##ハ
+##ヒ
+##フ
+##ヘ
+##ホ
+##マ
+##ミ
+##ム
+##メ
+##モ
+##ャ
+##ュ
+##ョ
+##ラ
+##リ
+##ル
+##レ
+##ロ
+##ワ
+##ン
+##・
+##ー
+##一
+##三
+##上
+##下
+##不
+##世
+##中
+##主
+##久
+##之
+##也
+##事
+##二
+##五
+##井
+##京
+##人
+##亻
+##仁
+##介
+##代
+##仮
+##伊
+##会
+##佐
+##侍
+##保
+##信
+##健
+##元
+##光
+##八
+##公
+##内
+##出
+##分
+##前
+##劉
+##力
+##加
+##勝
+##北
+##区
+##十
+##千
+##南
+##博
+##原
+##口
+##古
+##史
+##司
+##合
+##吉
+##同
+##名
+##和
+##囗
+##四
+##国
+##國
+##土
+##地
+##坂
+##城
+##堂
+##場
+##士
+##夏
+##外
+##大
+##天
+##太
+##夫
+##奈
+##女
+##子
+##学
+##宀
+##宇
+##安
+##宗
+##定
+##宣
+##宮
+##家
+##宿
+##寺
+##將
+##小
+##尚
+##山
+##岡
+##島
+##崎
+##川
+##州
+##巿
+##帝
+##平
+##年
+##幸
+##广
+##弘
+##張
+##彳
+##後
+##御
+##德
+##心
+##忄
+##志
+##忠
+##愛
+##成
+##我
+##戦
+##戸
+##手
+##扌
+##政
+##文
+##新
+##方
+##日
+##明
+##星
+##春
+##昭
+##智
+##曲
+##書
+##月
+##有
+##朝
+##木
+##本
+##李
+##村
+##東
+##松
+##林
+##森
+##楊
+##樹
+##橋
+##歌
+##止
+##正
+##武
+##比
+##氏
+##民
+##水
+##氵
+##氷
+##永
+##江
+##沢
+##河
+##治
+##法
+##海
+##清
+##漢
+##瀬
+##火
+##版
+##犬
+##王
+##生
+##田
+##男
+##疒
+##発
+##白
+##的
+##皇
+##目
+##相
+##省
+##真
+##石
+##示
+##社
+##神
+##福
+##禾
+##秀
+##秋
+##空
+##立
+##章
+##竹
+##糹
+##美
+##義
+##耳
+##良
+##艹
+##花
+##英
+##華
+##葉
+##藤
+##行
+##街
+##西
+##見
+##訁
+##語
+##谷
+##貝
+##貴
+##車
+##軍
+##辶
+##道
+##郎
+##郡
+##部
+##都
+##里
+##野
+##金
+##鈴
+##镇
+##長
+##門
+##間
+##阝
+##阿
+##陳
+##陽
+##雄
+##青
+##面
+##風
+##食
+##香
+##馬
+##高
+##龍
+##龸
+##ﬁ
+##ﬂ
+##！
+##（
+##）
+##，
+##－
+##．
+##／
+##：
+##？
+##～
diff --git a/anet_clip/backup/pdvc/modules/cross-base/cross_config.json b/anet_clip/backup/pdvc/modules/cross-base/cross_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a4807695d56a3aea97a55a9db97ba753e960748
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/cross-base/cross_config.json
@@ -0,0 +1,12 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 1024,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 2,
+  "vocab_size": 768
+}
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json b/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..91c46b63eba081afb28085a6d53f390ada5a5cfe
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/decoder-base/decoder_config.json
@@ -0,0 +1,14 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522,
+  "num_decoder_layers": 1,
+  "max_target_embeddings": 512
+}
diff --git a/anet_clip/backup/pdvc/modules/file_utils.py b/anet_clip/backup/pdvc/modules/file_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..43fa8ca87e20ee5333dd84a09795a743bbf3f183
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/file_utils.py
@@ -0,0 +1,239 @@
+"""
+Utilities for working with the local dataset cache.
+This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
+Copyright by the AllenNLP authors.
+"""
+
+import os
+import logging
+import shutil
+import tempfile
+import json
+from urllib.parse import urlparse
+from pathlib import Path
+from typing import Optional, Tuple, Union, IO, Callable, Set
+from hashlib import sha256
+from functools import wraps
+
+from tqdm import tqdm
+
+import boto3
+from botocore.exceptions import ClientError
+import requests
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+
+def url_to_filename(url: str, etag: str = None) -> str:
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the url's, delimited
+    by a period.
+    """
+    url_bytes = url.encode('utf-8')
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode('utf-8')
+        etag_hash = sha256(etag_bytes)
+        filename += '.' + etag_hash.hexdigest()
+
+    return filename
+
+
+def filename_to_url(filename: str, cache_dir: Union[str, Path] = None) -> Tuple[str, str]:
+    """
+    Return the url and etag (which may be ``None``) stored for `filename`.
+    Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    cache_path = os.path.join(cache_dir, filename)
+    if not os.path.exists(cache_path):
+        raise FileNotFoundError("file {} not found".format(cache_path))
+
+    meta_path = cache_path + '.json'
+    if not os.path.exists(meta_path):
+        raise FileNotFoundError("file {} not found".format(meta_path))
+
+    with open(meta_path) as meta_file:
+        metadata = json.load(meta_file)
+    url = metadata['url']
+    etag = metadata['etag']
+
+    return url, etag
+
+
+def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    parsed = urlparse(url_or_filename)
+
+    if parsed.scheme in ('http', 'https', 's3'):
+        # URL, so get it from the cache (downloading if necessary)
+        return get_from_cache(url_or_filename, cache_dir)
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        return url_or_filename
+    elif parsed.scheme == '':
+        # File, but it doesn't exist.
+        raise FileNotFoundError("file {} not found".format(url_or_filename))
+    else:
+        # Something unknown
+        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+
+
+def split_s3_path(url: str) -> Tuple[str, str]:
+    """Split a full s3 path into the bucket name and path."""
+    parsed = urlparse(url)
+    if not parsed.netloc or not parsed.path:
+        raise ValueError("bad s3 path {}".format(url))
+    bucket_name = parsed.netloc
+    s3_path = parsed.path
+    # Remove '/' at beginning of path.
+    if s3_path.startswith("/"):
+        s3_path = s3_path[1:]
+    return bucket_name, s3_path
+
+
+def s3_request(func: Callable):
+    """
+    Wrapper function for s3 requests in order to create more helpful error
+    messages.
+    """
+
+    @wraps(func)
+    def wrapper(url: str, *args, **kwargs):
+        try:
+            return func(url, *args, **kwargs)
+        except ClientError as exc:
+            if int(exc.response["Error"]["Code"]) == 404:
+                raise FileNotFoundError("file {} not found".format(url))
+            else:
+                raise
+
+    return wrapper
+
+
+@s3_request
+def s3_etag(url: str) -> Optional[str]:
+    """Check ETag on S3 object."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_object = s3_resource.Object(bucket_name, s3_path)
+    return s3_object.e_tag
+
+
+@s3_request
+def s3_get(url: str, temp_file: IO) -> None:
+    """Pull a file directly from S3."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file)
+
+
+def http_get(url: str, temp_file: IO) -> None:
+    req = requests.get(url, stream=True)
+    content_length = req.headers.get('Content-Length')
+    total = int(content_length) if content_length is not None else None
+    progress = tqdm(unit="B", total=total)
+    for chunk in req.iter_content(chunk_size=1024):
+        if chunk: # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+
+def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given a URL, look for the corresponding dataset in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    os.makedirs(cache_dir, exist_ok=True)
+
+    # Get eTag to add to filename, if it exists.
+    if url.startswith("s3://"):
+        etag = s3_etag(url)
+    else:
+        response = requests.head(url, allow_redirects=True)
+        if response.status_code != 200:
+            raise IOError("HEAD request failed for url {} with status code {}"
+                          .format(url, response.status_code))
+        etag = response.headers.get("ETag")
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    if not os.path.exists(cache_path):
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with tempfile.NamedTemporaryFile() as temp_file:
+            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
+
+            # GET file object
+            if url.startswith("s3://"):
+                s3_get(url, temp_file)
+            else:
+                http_get(url, temp_file)
+
+            # we are copying the file before closing it, so flush to avoid truncation
+            temp_file.flush()
+            # shutil.copyfileobj() starts at the current position, so go to the start
+            temp_file.seek(0)
+
+            logger.info("copying %s to cache at %s", temp_file.name, cache_path)
+            with open(cache_path, 'wb') as cache_file:
+                shutil.copyfileobj(temp_file, cache_file)
+
+            logger.info("creating metadata file for %s", cache_path)
+            meta = {'url': url, 'etag': etag}
+            meta_path = cache_path + '.json'
+            with open(meta_path, 'w') as meta_file:
+                json.dump(meta, meta_file)
+
+            logger.info("removing temp file %s", temp_file.name)
+
+    return cache_path
+
+
+def read_set_from_file(filename: str) -> Set[str]:
+    '''
+    Extract a de-duped collection (set) of text from a file.
+    Expected file format is one item per line.
+    '''
+    collection = set()
+    with open(filename, 'r', encoding='utf-8') as file_:
+        for line in file_:
+            collection.add(line.rstrip())
+    return collection
+
+
+def get_file_extension(path: str, dot=True, lower: bool = True):
+    ext = os.path.splitext(path)[1]
+    ext = ext if dot else ext[1:]
+    return ext.lower() if lower else ext
diff --git a/anet_clip/backup/pdvc/modules/modeling.py b/anet_clip/backup/pdvc/modules/modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..9551b488c16d04fad65dcdaeba7d73d7740f2902
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/modeling.py
@@ -0,0 +1,429 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn import CrossEntropyLoss, MSELoss
+
+from pdvc.modules.until_module import PreTrainedModel, LayerNorm, CrossEn, MILNCELoss, MaxMarginRankingLoss
+from pdvc.modules.module_bert import BertModel, BertConfig, BertOnlyMLMHead
+from pdvc.modules.module_visual import VisualModel, VisualConfig, VisualOnlyMLMHead
+from pdvc.modules.module_cross import CrossModel, CrossConfig
+from pdvc.modules.module_decoder import DecoderModel, DecoderConfig
+
+logger = logging.getLogger(__name__)
+
+
+class UniVLPreTrainedModel(PreTrainedModel, nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs):
+        # utilize bert config as base config
+        super(UniVLPreTrainedModel, self).__init__(bert_config)
+        self.bert_config = bert_config
+        self.visual_config = visual_config
+        self.cross_config = cross_config
+        self.decoder_config = decoder_config
+
+        self.bert = None
+        self.visual = None
+        self.cross = None
+        self.decoder = None
+
+    @classmethod
+    def from_pretrained(cls, pretrained_bert_name, visual_model_name, cross_model_name, decoder_model_name,
+                        state_dict=None, cache_dir=None, type_vocab_size=2, *inputs, **kwargs):
+
+        task_config = None
+        if "task_config" in kwargs.keys():
+            task_config = kwargs["task_config"]
+            if not hasattr(task_config, "local_rank"):
+                task_config.__dict__["local_rank"] = 0
+            elif task_config.local_rank == -1:
+                task_config.local_rank = 0
+        print(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config)
+        bert_config, state_dict = BertConfig.get_config(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config=task_config)
+        visual_config, _ = VisualConfig.get_config(visual_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+        cross_config, _ = CrossConfig.get_config(cross_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+        decoder_config, _ = DecoderConfig.get_config(decoder_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+
+        model = cls(bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs)
+
+        assert model.bert is not None
+        assert model.visual is not None
+
+        if state_dict is not None:
+            model = cls.init_preweight(model, state_dict, task_config=task_config)
+
+        return model
+
+class NormalizeVideo(nn.Module):
+    def __init__(self, task_config):
+        super(NormalizeVideo, self).__init__()
+        self.visual_norm2d = LayerNorm(task_config.video_dim)
+
+    def forward(self, video):
+        video = torch.as_tensor(video).float()
+        video = video.view(-1, video.shape[-2], video.shape[-1])
+        video = self.visual_norm2d(video)
+        return video
+
+def show_log(task_config, info):
+    if task_config is None or task_config.local_rank == 0:
+        logger.warning(info)
+
+def update_attr(target_name, target_config, target_attr_name, source_config, source_attr_name, default_value=None):
+    if hasattr(source_config, source_attr_name):
+        if default_value is None or getattr(source_config, source_attr_name) != default_value:
+            setattr(target_config, target_attr_name, getattr(source_config, source_attr_name))
+            show_log(source_config, "Set {}.{}: {}.".format(target_name,
+                                                            target_attr_name, getattr(target_config, target_attr_name)))
+    return target_config
+
+def check_attr(target_name, task_config):
+    return hasattr(task_config, target_name) and task_config.__dict__[target_name]
+
+class UniVL(UniVLPreTrainedModel):
+    def __init__(self, bert_config, visual_config, cross_config, decoder_config, task_config):
+        super(UniVL, self).__init__(bert_config, visual_config, cross_config, decoder_config)
+        self.task_config = task_config
+        self.ignore_video_index = -1
+
+        assert self.task_config.max_words <= bert_config.max_position_embeddings
+        assert self.task_config.max_words <= decoder_config.max_target_embeddings
+        assert self.task_config.max_frames <= visual_config.max_position_embeddings
+        assert self.task_config.max_words + self.task_config.max_frames <= cross_config.max_position_embeddings
+
+        self._stage_one = True
+        self._stage_two = False
+
+        if check_attr('stage_two', self.task_config):
+            self._stage_one = False
+            self._stage_two = self.task_config.stage_two
+        show_log(task_config, "Stage-One:{}, Stage-Two:{}".format(self._stage_one, self._stage_two))
+
+        self.train_sim_after_cross = False
+        if self._stage_one and check_attr('train_sim_after_cross', self.task_config):
+            self.train_sim_after_cross = True
+            show_log(task_config, "Test retrieval after cross encoder.")
+
+        # Text Encoder ===>
+        bert_config = update_attr("bert_config", bert_config, "num_hidden_layers",
+                                   self.task_config, "text_num_hidden_layers")
+        # print('=================The bert config:==========/n',bert_config)
+        # print('=================The task config:==========/n',self.task_config)
+        self.bert = BertModel(bert_config)
+        bert_word_embeddings_weight = self.bert.embeddings.word_embeddings.weight
+        bert_position_embeddings_weight = self.bert.embeddings.position_embeddings.weight
+        # <=== End of Text Encoder
+
+        # Video Encoder ===>
+        visual_config = update_attr("visual_config", visual_config, "num_hidden_layers",
+                                    self.task_config, "visual_num_hidden_layers")
+        self.visual = VisualModel(visual_config)
+        visual_word_embeddings_weight = self.visual.embeddings.word_embeddings.weight
+        # <=== End of Video Encoder
+
+        if self._stage_one is False or self.train_sim_after_cross:
+            # Cross Encoder ===>
+            cross_config = update_attr("cross_config", cross_config, "num_hidden_layers",
+                                        self.task_config, "cross_num_hidden_layers")
+            self.cross = CrossModel(cross_config) 
+            # <=== End of Cross Encoder
+
+            if self.train_sim_after_cross is False:
+                # Decoder ===>
+                decoder_config = update_attr("decoder_config", decoder_config, "num_decoder_layers",
+                                           self.task_config, "decoder_num_hidden_layers")
+                self.decoder = DecoderModel(decoder_config, bert_word_embeddings_weight, bert_position_embeddings_weight)
+                # <=== End of Decoder
+
+            if self.task_config.do_pretrain:
+                self.cls = BertOnlyMLMHead(bert_config, bert_word_embeddings_weight)
+                self.cls_visual = VisualOnlyMLMHead(visual_config, visual_word_embeddings_weight)
+                self.alm_loss_fct = CrossEntropyLoss(ignore_index=-1)
+                
+            self.similarity_dense = nn.Linear(bert_config.hidden_size, 1)
+            self.decoder_loss_fct = CrossEntropyLoss(ignore_index=-1)
+
+        self.normalize_video = NormalizeVideo(task_config)
+
+        mILNCELoss = MILNCELoss(batch_size=task_config.batch_size // task_config.n_gpu, n_pair=task_config.n_pair, )
+        maxMarginRankingLoss = MaxMarginRankingLoss(margin=task_config.margin,
+                                                    negative_weighting=task_config.negative_weighting,
+                                                    batch_size=task_config.batch_size // task_config.n_gpu,
+                                                    n_pair=task_config.n_pair,
+                                                    hard_negative_rate=task_config.hard_negative_rate, )
+
+        if task_config.use_mil:
+            self.loss_fct = CrossEn() if self._stage_two else mILNCELoss
+            self._pretrain_sim_loss_fct = mILNCELoss
+        else:
+            self.loss_fct = CrossEn() if self._stage_two else maxMarginRankingLoss
+            self._pretrain_sim_loss_fct = maxMarginRankingLoss
+
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, token_type_ids, attention_mask, video, video_mask=None,
+                pairs_masked_text=None, pairs_token_labels=None, masked_video=None, video_labels_index=None,
+                input_caption_ids=None, decoder_mask=None, output_caption_ids=None):
+
+        input_ids = input_ids.view(-1, input_ids.shape[-1])
+        token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1])
+        attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+        video_mask = video_mask.view(-1, video_mask.shape[-1])
+        video = self.normalize_video(video)
+
+        if input_caption_ids is not None:
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        sequence_output, visual_output = self.get_sequence_visual_output(input_ids, token_type_ids, attention_mask,
+                                                                         video, video_mask, shaped=True)
+
+        if self.training:
+            loss = 0.
+            if self._stage_one:
+                sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask,
+                                                        video_mask, shaped=True)
+                sim_loss = self.loss_fct(sim_matrix)
+                loss += sim_loss
+
+            if self._stage_two:
+                if self.task_config.do_pretrain:
+                    pairs_masked_text = pairs_masked_text.view(-1, pairs_masked_text.shape[-1])
+                    pairs_token_labels = pairs_token_labels.view(-1, pairs_token_labels.shape[-1])
+
+                    masked_video = self.normalize_video(masked_video)
+                    video_labels_index = video_labels_index.view(-1, video_labels_index.shape[-1])
+
+                    sequence_output_alm, visual_output_alm = self.get_sequence_visual_output(pairs_masked_text, token_type_ids,
+                                                                                             attention_mask, masked_video, video_mask, shaped=True)
+
+                    cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output_alm, visual_output_alm, attention_mask, video_mask)
+                    sequence_cross_output, visual_cross_output = torch.split(cross_output, [attention_mask.size(-1), video_mask.size(-1)], dim=1)
+
+                    alm_loss = self._calculate_mlm_loss(sequence_cross_output, pairs_token_labels)
+                    loss += alm_loss
+
+                    nce_loss = self._calculate_mfm_loss(visual_cross_output, video, video_mask, video_labels_index)
+                    loss += nce_loss
+
+                    sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, video_mask,
+                                                            shaped=True, _pretrain_joint=True)
+                    sim_loss_joint = self._pretrain_sim_loss_fct(sim_matrix)
+                    loss += sim_loss_joint
+
+                if (input_caption_ids is not None) and \
+                        (self.task_config.do_pretrain
+                         or (self.task_config.do_pretrain is False and self.task_config.task_type == "caption")):
+                    if self.task_config.do_pretrain:
+                        decoder_scores, res_tuples = self._get_decoder_score(sequence_output_alm, visual_output_alm,
+                                                                             input_ids, attention_mask, video_mask,
+                                                                             input_caption_ids, decoder_mask, shaped=True)
+                    elif self.task_config.task_type == "caption":
+                        decoder_scores, res_tuples = self._get_decoder_score(sequence_output, visual_output,
+                                                                             input_ids, attention_mask, video_mask,
+                                                                             input_caption_ids, decoder_mask, shaped=True)
+                    else:
+                        raise NotImplementedError
+
+                    output_caption_ids = output_caption_ids.view(-1, output_caption_ids.shape[-1])
+                    decoder_loss = self.decoder_loss_fct(decoder_scores.view(-1, self.bert_config.vocab_size), output_caption_ids.view(-1))
+                    loss += decoder_loss
+
+                if self.task_config.do_pretrain or self.task_config.task_type == "retrieval":
+                    if self.task_config.do_pretrain:
+                        sim_matrix_text_visual = self.get_similarity_logits(sequence_output_alm, visual_output_alm,
+                                                                            attention_mask, video_mask, shaped=True)
+                    elif self.task_config.task_type == "retrieval":
+                        sim_matrix_text_visual = self.get_similarity_logits(sequence_output, visual_output,
+                                                                            attention_mask, video_mask, shaped=True)
+                    else:
+                        raise NotImplementedError
+
+                    sim_loss_text_visual = self.loss_fct(sim_matrix_text_visual)
+                    loss += sim_loss_text_visual
+
+            return loss
+        else:
+            return None
+
+    def _calculate_mlm_loss(self, sequence_output_alm, pairs_token_labels):
+        alm_scores = self.cls(sequence_output_alm)
+        alm_loss = self.alm_loss_fct(alm_scores.view(-1, self.bert_config.vocab_size), pairs_token_labels.view(-1))
+        return alm_loss
+
+    def _calculate_mfm_loss(self, visual_output_alm, video, video_mask, video_labels_index):
+        afm_scores = self.cls_visual(visual_output_alm)
+        afm_scores_tr = afm_scores.view(-1, afm_scores.shape[-1])
+
+        video_tr = video.permute(2, 0, 1)
+        video_tr = video_tr.view(video_tr.shape[0], -1)
+
+        logits_matrix = torch.mm(afm_scores_tr, video_tr)
+        video_mask_float = video_mask.to(dtype=torch.float)
+        mask_matrix = torch.mm(video_mask_float.view(-1, 1), video_mask_float.view(1, -1))
+        masked_logits = logits_matrix + (1. - mask_matrix) * -1e8
+
+        logpt = F.log_softmax(masked_logits, dim=-1)
+        logpt = torch.diag(logpt)
+        nce_loss = -logpt
+
+        video_labels_index_mask = (video_labels_index != self.ignore_video_index)
+        nce_loss = nce_loss.masked_select(video_labels_index_mask.view(-1))
+        nce_loss = nce_loss.mean()
+        return nce_loss
+
+    def get_sequence_visual_output(self, input_ids, token_type_ids, attention_mask, video, video_mask, shaped=False):
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+            video = self.normalize_video(video)
+        encoded_layers, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True)
+        sequence_output = encoded_layers[-1]
+
+        visual_layers, _ = self.visual(video, video_mask, output_all_encoded_layers=True)
+        visual_output = visual_layers[-1]
+
+        return sequence_output, visual_output
+
+    def _get_cross_output(self, sequence_output, visual_output, attention_mask, video_mask):
+        concat_features = torch.cat((sequence_output, visual_output), dim=1)  # concatnate tokens and frames
+        concat_mask = torch.cat((attention_mask, video_mask), dim=1)
+        text_type_ = torch.zeros_like(attention_mask)
+        video_type_ = torch.ones_like(video_mask)
+        concat_type = torch.cat((text_type_, video_type_), dim=1)
+
+        cross_layers, pooled_output = self.cross(concat_features, concat_type, concat_mask, output_all_encoded_layers=True)
+        cross_output = cross_layers[-1]
+
+        return cross_output, pooled_output, concat_mask
+
+    def _mean_pooling_for_similarity(self, sequence_output, visual_output, attention_mask, video_mask,):
+        attention_mask_un = attention_mask.to(dtype=torch.float).unsqueeze(-1)
+        attention_mask_un[:, 0, :] = 0.
+        sequence_output = sequence_output * attention_mask_un
+        text_out = torch.sum(sequence_output, dim=1) / torch.sum(attention_mask_un, dim=1, dtype=torch.float)
+
+        video_mask_un = video_mask.to(dtype=torch.float).unsqueeze(-1)
+        visual_output = visual_output * video_mask_un
+        video_mask_un_sum = torch.sum(video_mask_un, dim=1, dtype=torch.float)
+        video_mask_un_sum[video_mask_un_sum == 0.] = 1.
+        video_out = torch.sum(visual_output, dim=1) / video_mask_un_sum
+
+        return text_out, video_out
+
+    def _cross_similarity(self, sequence_output, visual_output, attention_mask, video_mask):
+        b_text, s_text, h_text = sequence_output.size()
+        b_visual, s_visual, h_visual = visual_output.size()
+
+        retrieve_logits_list = []
+        step_size = 5
+
+        split_size = [step_size] * (b_text // step_size)
+        release_size = b_text - sum(split_size)
+        if release_size > 0:
+            split_size += [release_size]
+
+        sequence_output_splits = torch.split(sequence_output, split_size, dim=0)
+        attention_mask_splits = torch.split(attention_mask, split_size, dim=0)
+        for i in range(len(split_size)):
+            sequence_output_row = sequence_output_splits[i]
+            attention_mask_row = attention_mask_splits[i]
+            sequence_output_l = sequence_output_row.unsqueeze(1).repeat(1, b_visual, 1, 1)
+            sequence_output_l = sequence_output_l.view(-1, s_text, h_text)
+            attention_mask_l = attention_mask_row.unsqueeze(1).repeat(1, b_visual, 1)
+            attention_mask_l = attention_mask_l.view(-1, s_text)
+
+            step_truth = sequence_output_row.size(0)
+            visual_output_r = visual_output.unsqueeze(0).repeat(step_truth, 1, 1, 1)
+            visual_output_r = visual_output_r.view(-1, s_visual, h_visual)
+            video_mask_r = video_mask.unsqueeze(0).repeat(step_truth, 1, 1)
+            video_mask_r = video_mask_r.view(-1, s_visual)
+
+            cross_output, pooled_output, concat_mask = \
+                self._get_cross_output(sequence_output_l, visual_output_r, attention_mask_l, video_mask_r)
+            retrieve_logits_row = self.similarity_dense(pooled_output).squeeze(-1).view(step_truth, b_visual)
+
+            retrieve_logits_list.append(retrieve_logits_row)
+        retrieve_logits = torch.cat(retrieve_logits_list, dim=0)
+        return retrieve_logits
+
+    def get_similarity_logits(self, sequence_output, visual_output, attention_mask, video_mask, shaped=False, _pretrain_joint=False):
+        if shaped is False:
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+        if (self._stage_two and _pretrain_joint is False) or self.train_sim_after_cross:
+            retrieve_logits = self._cross_similarity(sequence_output, visual_output, attention_mask, video_mask)
+        else:
+            text_out, video_out = self._mean_pooling_for_similarity(sequence_output, visual_output, attention_mask, video_mask)
+            if self.task_config.use_mil is False:
+                text_out = F.normalize(text_out, dim=-1)
+                video_out = F.normalize(video_out, dim=-1)
+            retrieve_logits = torch.matmul(text_out, video_out.t())
+
+        return retrieve_logits
+
+    def _get_decoder_score(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, shaped=False):
+
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        res_tuples = ()
+        cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output, visual_output, attention_mask, video_mask)
+        decoder_scores = self.decoder(input_caption_ids, encoder_outs=cross_output, answer_mask=decoder_mask, encoder_mask=concat_mask)
+
+        return decoder_scores, res_tuples
+
+    def decoder_caption(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask,
+                        shaped=False, get_logits=False):
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        decoder_scores, _ = self._get_decoder_score(sequence_output, visual_output,
+                                                    input_ids, attention_mask, video_mask,
+                                                    input_caption_ids, decoder_mask, shaped=True)
+
+        if get_logits:
+            return decoder_scores
+
+        _, decoder_scores_result = torch.max(decoder_scores, -1)
+
+        return decoder_scores_result
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/module_bert.py b/anet_clip/backup/pdvc/modules/module_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa376657fdf271f11978379665a67897c2cc5943
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/module_bert.py
@@ -0,0 +1,447 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz",
+}
+
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+
+
+class BertConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `BertModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs BertConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `BertModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(BertEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids, token_type_ids=None):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = words_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(BertSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super(BertAttention, self).__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super(BertLayer, self).__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super(BertEncoder, self).__init__()
+        layer = BertLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super(BertPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(bert_model_embedding_weights.size(1),
+                                 bert_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = bert_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class BertOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(BertOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class BertPreTrainingHeads(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertPreTrainingHeads, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+class BertModel(PreTrainedModel):
+    """BERT model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a BertConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see BERT paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see BERT's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
+
+    config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.BertModel(config=config)
+    all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(BertModel, self).__init__(config)
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.apply(self.init_weights)
+
+
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(input_ids, token_type_ids)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/module_cross.py b/anet_clip/backup/pdvc/modules/module_cross.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ff41910a2c62e1c79ab3f843bef3c54171bb026
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/module_cross.py
@@ -0,0 +1,394 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'cross_config.json'
+WEIGHTS_NAME = 'cross_pytorch_model.bin'
+
+
+class CrossConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `CrossModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs CrossConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CrossModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `CrossModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+
+class CrossEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(CrossEmbeddings, self).__init__()
+
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, concat_embeddings, concat_type=None):
+
+        batch_size, seq_length = concat_embeddings.size(0), concat_embeddings.size(1)
+        if concat_type is None:
+            concat_type = torch.zeros(batch_size, concat_type).to(concat_embeddings.device)
+
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=concat_embeddings.device)
+        position_ids = position_ids.unsqueeze(0).expand(concat_embeddings.size(0), -1)
+
+        token_type_embeddings = self.token_type_embeddings(concat_type)
+        position_embeddings = self.position_embeddings(position_ids)
+
+        embeddings = concat_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class CrossSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(CrossSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in CrossModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class CrossSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(CrossSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super(CrossAttention, self).__init__()
+        self.self = CrossSelfAttention(config)
+        self.output = CrossSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class CrossIntermediate(nn.Module):
+    def __init__(self, config):
+        super(CrossIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class CrossOutput(nn.Module):
+    def __init__(self, config):
+        super(CrossOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class CrossLayer(nn.Module):
+    def __init__(self, config):
+        super(CrossLayer, self).__init__()
+        self.attention = CrossAttention(config)
+        self.intermediate = CrossIntermediate(config)
+        self.output = CrossOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class CrossEncoder(nn.Module):
+    def __init__(self, config):
+        super(CrossEncoder, self).__init__()
+        layer = CrossLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class CrossPooler(nn.Module):
+    def __init__(self, config):
+        super(CrossPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class CrossPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(CrossPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class CrossLMPredictionHead(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossLMPredictionHead, self).__init__()
+        self.transform = CrossPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(cross_model_embedding_weights.size(1),
+                                 cross_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = cross_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(cross_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class CrossOnlyMLMHead(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossOnlyMLMHead, self).__init__()
+        self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class CrossOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(CrossOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class CrossPreTrainingHeads(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossPreTrainingHeads, self).__init__()
+        self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+
+class CrossModel(PreTrainedModel):
+    def __init__(self, config):
+        super(CrossModel, self).__init__(config)
+        self.embeddings = CrossEmbeddings(config)
+        self.encoder = CrossEncoder(config)
+        self.pooler = CrossPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, concat_input, concat_type=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones(concat_input.size(0), concat_input.size(1))
+        if concat_type is None:
+            concat_type = torch.zeros_like(attention_mask)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(concat_input, concat_type)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
diff --git a/anet_clip/backup/pdvc/modules/module_decoder.py b/anet_clip/backup/pdvc/modules/module_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..25622d1e4c0e9a0d19fe2b4986f7267ba1526823
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/module_decoder.py
@@ -0,0 +1,406 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+import numpy as np
+
+import torch
+from torch import nn
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'decoder_config.json'
+WEIGHTS_NAME = 'decoder_pytorch_model.bin'
+
+
+class DecoderConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `DecoderModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 type_vocab_size=2,
+                 initializer_range=0.02,
+                 max_target_embeddings=128,
+                 num_decoder_layers=1):
+        """Constructs DecoderConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `DecoderModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `DecoderModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+            max_target_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            num_decoder_layers:
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+            self.max_target_embeddings = max_target_embeddings
+            self.num_decoder_layers = num_decoder_layers
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, decoder_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(decoder_model_embedding_weights.size(1),
+                                 decoder_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = decoder_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(decoder_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, decoder_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, decoder_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+class MultiHeadAttention(nn.Module):
+    ''' Multi-Head Attention module '''
+
+    def __init__(self, config):
+        super(MultiHeadAttention, self).__init__()
+
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, q, k, v, attention_mask):
+        mixed_query_layer = self.query(q)
+        mixed_key_layer = self.key(k)
+        mixed_value_layer = self.value(v)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        return context_layer, attention_scores
+
+class PositionwiseFeedForward(nn.Module):
+    ''' A two-feed-forward-layer module '''
+
+    def __init__(self, d_in, d_hid, dropout=0.1):
+        super().__init__()
+        self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise
+        self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise
+        self.layer_norm = nn.LayerNorm(d_in)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        residual = x
+        output = x.transpose(1, 2)
+        output = self.w_2(ACT2FN["gelu"](self.w_1(output)))
+        output = output.transpose(1, 2)
+        output = self.dropout(output)
+        output = self.layer_norm(output + residual)
+        return output
+
+class DecoderAttention(nn.Module):
+    def __init__(self, config):
+        super(DecoderAttention, self).__init__()
+        self.att = MultiHeadAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, q, k, v, attention_mask):
+        att_output, attention_probs = self.att(q, k, v, attention_mask)
+        attention_output = self.output(att_output, q)
+        return attention_output, attention_probs
+
+class DecoderLayer(nn.Module):
+    def __init__(self, config):
+        super(DecoderLayer, self).__init__()
+        self.slf_attn = DecoderAttention(config)
+        self.enc_attn = DecoderAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, dec_input, enc_output, slf_attn_mask=None, dec_enc_attn_mask=None):
+        slf_output, _ = self.slf_attn(dec_input, dec_input, dec_input, slf_attn_mask)
+        dec_output, dec_att_scores = self.enc_attn(slf_output, enc_output, enc_output, dec_enc_attn_mask)
+        intermediate_output = self.intermediate(dec_output)
+        dec_output = self.output(intermediate_output, dec_output)
+        return dec_output, dec_att_scores
+
+class DecoderEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight):
+        super(DecoderEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_target_embeddings, config.hidden_size)
+        self.word_embeddings.weight = decoder_word_embeddings_weight
+        self.position_embeddings.weight = decoder_position_embeddings_weight
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+
+        embeddings = words_embeddings + position_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class Decoder(nn.Module):
+    def __init__(self, config):
+        super(Decoder, self).__init__()
+        layer = DecoderLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_decoder_layers)])
+
+    def forward(self, hidden_states, encoder_outs, self_attn_mask, attention_mask, output_all_encoded_layers=False):
+        dec_att_scores = None
+        all_encoder_layers = []
+        all_dec_att_probs = []
+        for layer_module in self.layer:
+            hidden_states, dec_att_scores = layer_module(hidden_states, encoder_outs, self_attn_mask, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+                all_dec_att_probs.append(dec_att_scores)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+            all_dec_att_probs.append(dec_att_scores)
+        return all_encoder_layers, all_dec_att_probs
+
+class DecoderClassifier(nn.Module):
+    def __init__(self, config, embedding_weights):
+        super(DecoderClassifier, self).__init__()
+        self.cls = BertOnlyMLMHead(config, embedding_weights)
+
+    def forward(self, hidden_states):
+        cls_scores = self.cls(hidden_states)
+        return cls_scores
+
+class DecoderModel(PreTrainedModel):
+
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        final_norm (bool, optional): apply layer norm to the output of the
+            final decoder layer (default: True).
+    """
+
+    def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight):
+        super(DecoderModel, self).__init__(config)
+        self.config = config
+        self.max_target_length = config.max_target_embeddings
+        self.embeddings = DecoderEmbeddings(config, decoder_word_embeddings_weight, decoder_position_embeddings_weight)
+        self.decoder = Decoder(config)
+        self.classifier = DecoderClassifier(config, decoder_word_embeddings_weight)
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, encoder_outs=None, answer_mask=None, encoder_mask=None):
+        """
+        Args:
+            input_ids (LongTensor): previous decoder outputs of shape `(batch, tgt_len)`, for input feeding/teacher forcing
+            encoder_outs (Tensor, optional): output from the encoder, used for encoder-side attention
+
+        Returns:
+            tuple:
+                - the last decoder layer's output of shape `(batch, tgt_len, vocab)`
+                - the last decoder layer's attention weights of shape `(batch, tgt_len, src_len)`
+        """
+        embedding_output = self.embeddings(input_ids)
+
+        extended_encoder_mask = encoder_mask.unsqueeze(1).unsqueeze(2)   # b x 1 x 1 x ls
+        extended_encoder_mask = extended_encoder_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_encoder_mask = (1.0 - extended_encoder_mask) * -10000.0
+
+        extended_answer_mask = answer_mask.unsqueeze(1).unsqueeze(2)
+        extended_answer_mask = extended_answer_mask.to(dtype=self.dtype)  # fp16 compatibility
+
+        sz_b, len_s, _ = embedding_output.size()
+        subsequent_mask = torch.triu(torch.ones((len_s, len_s), device=embedding_output.device, dtype=embedding_output.dtype), diagonal=1)
+        self_attn_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1).unsqueeze(1)  # b x 1 x ls x ls
+        slf_attn_mask = ((1.0 - extended_answer_mask) + self_attn_mask).gt(0).to(dtype=self.dtype)
+        self_attn_mask = slf_attn_mask * -10000.0
+
+        decoded_layers, dec_att_scores = self.decoder(embedding_output,
+                                      encoder_outs,
+                                      self_attn_mask,
+                                      extended_encoder_mask,
+                                      )
+        sequence_output = decoded_layers[-1]
+        cls_scores = self.classifier(sequence_output)
+
+        return cls_scores
diff --git a/anet_clip/backup/pdvc/modules/module_visual.py b/anet_clip/backup/pdvc/modules/module_visual.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9a43f8a74c1e5e020c8b4daec33d7adb5d3b840
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/module_visual.py
@@ -0,0 +1,425 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'visual_config.json'
+WEIGHTS_NAME = 'visual_pytorch_model.bin'
+
+
+class VisualConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `VisualModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file=4096,
+                 hidden_size=768,
+                 num_hidden_layers=3,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 initializer_range=0.02):
+        """Constructs VisualConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Size of the encoder layers and the pooler layer.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class VisualEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(VisualEmbeddings, self).__init__()
+
+        self.word_embeddings = nn.Linear(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_embeddings):
+        seq_length = input_embeddings.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_embeddings.device)
+        position_ids = position_ids.unsqueeze(0).expand(input_embeddings.size(0), -1)
+
+        words_embeddings = self.word_embeddings(input_embeddings)
+        # words_embeddings = self.transform_act_fn(words_embeddings)
+
+        position_embeddings = self.position_embeddings(position_ids)
+        embeddings = words_embeddings + position_embeddings
+
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class VisualSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(VisualSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in VisualModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class VisualSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(VisualSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class VisualAttention(nn.Module):
+    def __init__(self, config):
+        super(VisualAttention, self).__init__()
+        self.self = VisualSelfAttention(config)
+        self.output = VisualSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class VisualIntermediate(nn.Module):
+    def __init__(self, config):
+        super(VisualIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class VisualOutput(nn.Module):
+    def __init__(self, config):
+        super(VisualOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class VisualLayer(nn.Module):
+    def __init__(self, config):
+        super(VisualLayer, self).__init__()
+        self.attention = VisualAttention(config)
+        self.intermediate = VisualIntermediate(config)
+        self.output = VisualOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class VisualEncoder(nn.Module):
+    def __init__(self, config):
+        super(VisualEncoder, self).__init__()
+        layer = VisualLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class VisualPooler(nn.Module):
+    def __init__(self, config):
+        super(VisualPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class VisualPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(VisualPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class VisualLMPredictionHead(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualLMPredictionHead, self).__init__()
+        self.transform = VisualPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.weight = visual_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(visual_model_embedding_weights.size(1)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = hidden_states.matmul(self.weight) + self.bias
+        return hidden_states
+
+
+class VisualOnlyMLMHead(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualOnlyMLMHead, self).__init__()
+        self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class VisualOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(VisualOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class VisualPreTrainingHeads(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualPreTrainingHeads, self).__init__()
+        self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+
+class VisualModel(PreTrainedModel):
+    """Visual model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a VisualConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see  paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for Visual-base, 24 for Visual-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see 's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+
+    config = modeling.VisualConfig(vocab_size_or_config_json_file=4096, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.VisualModel(config=config)
+    all_encoder_layers, pooled_output = model(video, video_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(VisualModel, self).__init__(config)
+        self.embeddings = VisualEmbeddings(config)
+        self.encoder = VisualEncoder(config)
+        self.pooler = VisualPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, video, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones(video.size(0), video.size(1))
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(video)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/optimization.py b/anet_clip/backup/pdvc/modules/optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..264c57c7d8f213004b4ee82a8861e0ae6103c906
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/optimization.py
@@ -0,0 +1,168 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch optimization for BERT model."""
+
+import math
+import torch
+from torch.optim import Optimizer
+from torch.optim.optimizer import required
+from torch.nn.utils import clip_grad_norm_
+import logging
+
+logger = logging.getLogger(__name__)
+
+def warmup_cosine(x, warmup=0.002):
+    if x < warmup:
+        return x/warmup
+    return 0.5 * (1.0 + torch.cos(math.pi * x))
+
+def warmup_constant(x, warmup=0.002):
+    """ Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps.
+        Learning rate is 1. afterwards. """
+    if x < warmup:
+        return x/warmup
+    return 1.0
+
+def warmup_linear(x, warmup=0.002):
+    """ Specifies a triangular learning rate schedule where peak is reached at `warmup`*`t_total`-th (as provided to BertAdam) training step.
+        After `t_total`-th training step, learning rate is zero. """
+    if x < warmup:
+        return x/warmup
+    return max((x-1.)/(warmup-1.), 0)
+
+SCHEDULES = {
+    'warmup_cosine':   warmup_cosine,
+    'warmup_constant': warmup_constant,
+    'warmup_linear':   warmup_linear,
+}
+
+
+class BertAdam(Optimizer):
+    """Implements BERT version of Adam algorithm with weight decay fix.
+    Params:
+        lr: learning rate
+        warmup: portion of t_total for the warmup, -1  means no warmup. Default: -1
+        t_total: total number of training steps for the learning
+            rate schedule, -1  means constant learning rate. Default: -1
+        schedule: schedule to use for the warmup (see above). Default: 'warmup_linear'
+        b1: Adams b1. Default: 0.9
+        b2: Adams b2. Default: 0.999
+        e: Adams epsilon. Default: 1e-6
+        weight_decay: Weight decay. Default: 0.01
+        max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
+    """
+    def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
+                 b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01,
+                 max_grad_norm=1.0):
+        if lr is not required and lr < 0.0:
+            raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
+        if schedule not in SCHEDULES:
+            raise ValueError("Invalid schedule parameter: {}".format(schedule))
+        if not 0.0 <= warmup < 1.0 and not warmup == -1:
+            raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
+        if not 0.0 <= b1 < 1.0:
+            raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1))
+        if not 0.0 <= b2 < 1.0:
+            raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2))
+        if not e >= 0.0:
+            raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
+        defaults = dict(lr=lr, schedule=schedule, warmup=warmup, t_total=t_total,
+                        b1=b1, b2=b2, e=e, weight_decay=weight_decay,
+                        max_grad_norm=max_grad_norm)
+        super(BertAdam, self).__init__(params, defaults)
+
+    def get_lr(self):
+        lr = []
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                state = self.state[p]
+                if len(state) == 0:
+                    return [0]
+                if group['t_total'] != -1:
+                    schedule_fct = SCHEDULES[group['schedule']]
+                    lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
+                else:
+                    lr_scheduled = group['lr']
+                lr.append(lr_scheduled)
+        return lr
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.is_sparse:
+                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
+
+                state = self.state[p]
+
+                # State initialization
+                if len(state) == 0:
+                    state['step'] = 0
+                    # Exponential moving average of gradient values
+                    state['next_m'] = torch.zeros_like(p.data)
+                    # Exponential moving average of squared gradient values
+                    state['next_v'] = torch.zeros_like(p.data)
+
+                next_m, next_v = state['next_m'], state['next_v']
+                beta1, beta2 = group['b1'], group['b2']
+
+                # Add grad clipping
+                if group['max_grad_norm'] > 0:
+                    clip_grad_norm_(p, group['max_grad_norm'])
+
+                # Decay the first and second moment running average coefficient
+                # In-place operations to update the averages at the same time
+                # next_m.mul_(beta1).add_(1 - beta1, grad) --> pytorch 1.7
+                next_m.mul_(beta1).add_(grad, alpha=1 - beta1)
+                # next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad) --> pytorch 1.7
+                next_v.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                update = next_m / (next_v.sqrt() + group['e'])
+
+                # Just adding the square of the weights to the loss function is *not*
+                # the correct way of using L2 regularization/weight decay with Adam,
+                # since that will interact with the m and v parameters in strange ways.
+                #
+                # Instead we want to decay the weights in a manner that doesn't interact
+                # with the m/v parameters. This is equivalent to adding the square
+                # of the weights to the loss with plain (non-momentum) SGD.
+                if group['weight_decay'] > 0.0:
+                    update += group['weight_decay'] * p.data
+
+                if group['t_total'] != -1:
+                    schedule_fct = SCHEDULES[group['schedule']]
+                    progress = state['step']/group['t_total']
+                    lr_scheduled = group['lr'] * schedule_fct(progress, group['warmup'])
+                else:
+                    lr_scheduled = group['lr']
+
+                update_with_lr = lr_scheduled * update
+                p.data.add_(-update_with_lr)
+
+                state['step'] += 1
+
+        return loss
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/tokenization.py b/anet_clip/backup/pdvc/modules/tokenization.py
new file mode 100644
index 0000000000000000000000000000000000000000..183c81000f82aae59295f8d8572b6bcf67891790
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/tokenization.py
@@ -0,0 +1,408 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import unicodedata
+import os
+import sys
+import logging
+
+from .file_utils import cached_path
+
+logger = logging.getLogger(__name__)
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
+    'base-uncased': 512,
+    'large-uncased': 512,
+    'base-cased': 512,
+    'large-cased': 512,
+    'base-multilingual-uncased': 512,
+    'base-multilingual-cased': 512,
+    'base-chinese': 512,
+}
+VOCAB_NAME = 'vocab.txt'
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            if token not in self.vocab:
+                ids.append(self.vocab["[UNK]"])
+                logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token))
+            else:
+                ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            raise ValueError(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Download and cache the pre-trained model file if needed.
+        """
+        vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(vocab_file) is False:
+            if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
+                vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
+            else:
+                vocab_file = pretrained_model_name
+        if os.path.isdir(vocab_file):
+            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
+        # redirect to the cache, if necessary
+        print(vocab_file)
+        try:
+            resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            logger.error(
+                "Model name '{}' was not found. "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name,
+                    vocab_file))
+            return None
+        if resolved_vocab_file == vocab_file:
+            logger.info("loading vocabulary file {}".format(vocab_file))
+        else:
+            logger.info("loading vocabulary file {} from cache at {}".format(
+                vocab_file, resolved_vocab_file))
+        if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
+            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
+            # than the number of positional embeddings
+            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name]
+            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
+            kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")
+
+        # Instantiate tokenizer.
+        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
+
+        return tokenizer
+
+    def add_tokens(self, new_tokens, model):
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+        Args:
+            new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        Returns:
+            Number of tokens added to the vocabulary.
+        Examples::
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+
+        to_add_tokens = []
+        for token in new_tokens:
+            assert isinstance(token, str)
+            to_add_tokens.append(token)
+            # logger.info("Adding %s to the vocabulary", token)
+
+        vocab = collections.OrderedDict()
+        for token in self.vocab.keys():
+            vocab[token] = self.vocab[token]
+        for token in to_add_tokens:
+            vocab[token] = len(vocab)
+        self.vocab = self.wordpiece_tokenizer.vocab = vocab
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+
+        model.resize_token_embeddings(new_num_tokens=len(vocab))
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
diff --git a/anet_clip/backup/pdvc/modules/until_config.py b/anet_clip/backup/pdvc/modules/until_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..596c157aa23c82eb33c1fb2e07d9b006a52990e9
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/until_config.py
@@ -0,0 +1,126 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import logging
+import tarfile
+import tempfile
+import shutil
+import torch
+from .file_utils import cached_path
+
+logger = logging.getLogger(__name__)
+
+class PretrainedConfig(object):
+
+    pretrained_model_archive_map = {}
+    config_name = ""
+    weights_name = ""
+
+    @classmethod
+    def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None):
+        archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(archive_file) is False:
+            if pretrained_model_name in cls.pretrained_model_archive_map:
+                archive_file = cls.pretrained_model_archive_map[pretrained_model_name]
+            else:
+                archive_file = pretrained_model_name
+
+        # redirect to the cache, if necessary
+        try:
+            resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            if task_config is None or task_config.local_rank == 0:
+                logger.error(
+                    "Model name '{}' was not found in model name list. "
+                    "We assumed '{}' was a path or url but couldn't find any file "
+                    "associated to this path or url.".format(
+                        pretrained_model_name,
+                        archive_file))
+            return None
+        if resolved_archive_file == archive_file:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {}".format(archive_file))
+        else:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {} from cache at {}".format(
+                    archive_file, resolved_archive_file))
+        tempdir = None
+        if os.path.isdir(resolved_archive_file):
+            serialization_dir = resolved_archive_file
+        else:
+            # Extract archive to temp dir
+            tempdir = tempfile.mkdtemp()
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("extracting archive file {} to temp dir {}".format(
+                    resolved_archive_file, tempdir))
+            with tarfile.open(resolved_archive_file, 'r:gz') as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        # Load config
+        config_file = os.path.join(serialization_dir, cls.config_name)
+        config = cls.from_json_file(config_file)
+        config.type_vocab_size = type_vocab_size
+        if task_config is None or task_config.local_rank == 0:
+            logger.info("Model config {}".format(config))
+
+        if state_dict is None:
+            weights_path = os.path.join(serialization_dir, cls.weights_name)
+            if os.path.exists(weights_path):
+                state_dict = torch.load(weights_path, map_location='cpu')
+            else:
+                if task_config is None or task_config.local_rank == 0:
+                    logger.info("Weight doesn't exsits. {}".format(weights_path))
+
+        if tempdir:
+            # Clean up temp dir
+            shutil.rmtree(tempdir)
+
+        return config, state_dict
+
+    @classmethod
+    def from_dict(cls, json_object):
+        """Constructs a `BertConfig` from a Python dictionary of parameters."""
+        config = cls(vocab_size_or_config_json_file=-1)
+        for key, value in json_object.items():
+            config.__dict__[key] = value
+        return config
+
+    @classmethod
+    def from_json_file(cls, json_file):
+        """Constructs a `BertConfig` from a json file of parameters."""
+        with open(json_file, "r", encoding='utf-8') as reader:
+            text = reader.read()
+        return cls.from_dict(json.loads(text))
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/until_module.py b/anet_clip/backup/pdvc/modules/until_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..d550638157f8aeb2116a9cce022b2c563fd3491b
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/until_module.py
@@ -0,0 +1,251 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+import logging
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+import math
+from pdvc.modules.until_config import PretrainedConfig
+
+logger = logging.getLogger(__name__)
+
+def gelu(x):
+    """Implementation of the gelu activation function.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+def swish(x):
+    return x * torch.sigmoid(x)
+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
+
+class LayerNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-12):
+        """Construct a layernorm module in the TF style (epsilon inside the square root).
+        """
+        super(LayerNorm, self).__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.bias = nn.Parameter(torch.zeros(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, x):
+        u = x.mean(-1, keepdim=True)
+        s = (x - u).pow(2).mean(-1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+        return self.weight * x + self.bias
+
+class PreTrainedModel(nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, config, *inputs, **kwargs):
+        super(PreTrainedModel, self).__init__()
+        if not isinstance(config, PretrainedConfig):
+            raise ValueError(
+                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                "To create a model from a Google pretrained model use "
+                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                    self.__class__.__name__, self.__class__.__name__
+                ))
+        self.config = config
+
+    def init_weights(self, module):
+        """ Initialize the weights.
+        """
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, LayerNorm):
+            if 'beta' in dir(module) and 'gamma' in dir(module):
+                module.beta.data.zero_()
+                module.gamma.data.fill_(1.0)
+            else:
+                module.bias.data.zero_()
+                module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def resize_token_embeddings(self, new_num_tokens=None):
+        raise NotImplementedError
+
+    @classmethod
+    def init_preweight(cls, model, state_dict, prefix=None, task_config=None):
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if 'gamma' in key:
+                new_key = key.replace('gamma', 'weight')
+            if 'beta' in key:
+                new_key = key.replace('beta', 'bias')
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        if prefix is not None:
+            old_keys = []
+            new_keys = []
+            for key in state_dict.keys():
+                old_keys.append(key)
+                new_keys.append(prefix + key)
+            for old_key, new_key in zip(old_keys, new_keys):
+                state_dict[new_key] = state_dict.pop(old_key)
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, '_metadata', None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=''):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + '.')
+
+        load(model, prefix='')
+
+        if prefix is None and (task_config is None or task_config.local_rank == 0):
+            logger.info("-" * 20)
+            if len(missing_keys) > 0:
+                logger.info("Weights of {} not initialized from pretrained model: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(missing_keys)))
+            if len(unexpected_keys) > 0:
+                logger.info("Weights from pretrained model not used in {}: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(unexpected_keys)))
+            if len(error_msgs) > 0:
+                logger.error("Weights from pretrained model cause errors in {}: {}"
+                             .format(model.__class__.__name__, "\n   " + "\n   ".join(error_msgs)))
+
+        return model
+
+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
+    @classmethod
+    def from_pretrained(cls, config, state_dict=None,  *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict.
+        Download and cache the pre-trained model file if needed.
+        """
+        # Instantiate model.
+        model = cls(config, *inputs, **kwargs)
+        if state_dict is None:
+            return model
+        model = cls.init_preweight(model, state_dict)
+
+        return model
+
+##################################
+###### LOSS FUNCTION #############
+##################################
+class CrossEn(nn.Module):
+    def __init__(self,):
+        super(CrossEn, self).__init__()
+
+    def forward(self, sim_matrix):
+        logpt = F.log_softmax(sim_matrix, dim=-1)
+        logpt = torch.diag(logpt)
+        nce_loss = -logpt
+        sim_loss = nce_loss.mean()
+        return sim_loss
+
+class MILNCELoss(nn.Module):
+    def __init__(self, batch_size=1, n_pair=1,):
+        super(MILNCELoss, self).__init__()
+        self.batch_size = batch_size
+        self.n_pair = n_pair
+        torch_v = float(".".join(torch.__version__.split(".")[:2]))
+        self.bool_dtype = torch.bool if torch_v >= 1.3 else torch.uint8
+
+    def forward(self, sim_matrix):
+        mm_mask = np.eye(self.batch_size)
+        mm_mask = np.kron(mm_mask, np.ones((self.n_pair, self.n_pair)))
+        mm_mask = torch.tensor(mm_mask).float().to(sim_matrix.device)
+
+        from_text_matrix = sim_matrix + mm_mask * -1e12
+        from_video_matrix = sim_matrix.transpose(1, 0)
+
+        new_sim_matrix = torch.cat([from_video_matrix, from_text_matrix], dim=-1)
+        logpt = F.log_softmax(new_sim_matrix, dim=-1)
+
+        mm_mask_logpt = torch.cat([mm_mask, torch.zeros_like(mm_mask)], dim=-1)
+        masked_logpt = logpt + (torch.ones_like(mm_mask_logpt) - mm_mask_logpt) * -1e12
+
+        new_logpt = -torch.logsumexp(masked_logpt, dim=-1)
+
+        logpt_choice = torch.zeros_like(new_logpt)
+        mark_ind = torch.arange(self.batch_size).to(sim_matrix.device) * self.n_pair + (self.n_pair//2)
+        logpt_choice[mark_ind] = 1
+        sim_loss = new_logpt.masked_select(logpt_choice.to(dtype=self.bool_dtype)).mean()
+        return sim_loss
+
+class MaxMarginRankingLoss(nn.Module):
+    def __init__(self,
+                 margin=1.0,
+                 negative_weighting=False,
+                 batch_size=1,
+                 n_pair=1,
+                 hard_negative_rate=0.5,
+        ):
+        super(MaxMarginRankingLoss, self).__init__()
+        self.margin = margin
+        self.n_pair = n_pair
+        self.batch_size = batch_size
+        easy_negative_rate = 1 - hard_negative_rate
+        self.easy_negative_rate = easy_negative_rate
+        self.negative_weighting = negative_weighting
+        if n_pair > 1 and batch_size > 1:
+            alpha = easy_negative_rate / ((batch_size - 1) * (1 - easy_negative_rate))
+            mm_mask = (1 - alpha) * np.eye(self.batch_size) + alpha
+            mm_mask = np.kron(mm_mask, np.ones((n_pair, n_pair)))
+            mm_mask = torch.tensor(mm_mask) * (batch_size * (1 - easy_negative_rate))
+            self.mm_mask = mm_mask.float()
+
+    def forward(self, x):
+        d = torch.diag(x)
+        max_margin = F.relu(self.margin + x - d.view(-1, 1)) + \
+                     F.relu(self.margin + x - d.view(1, -1))
+        if self.negative_weighting and self.n_pair > 1 and self.batch_size > 1:
+            max_margin = max_margin * self.mm_mask.to(max_margin.device)
+        return max_margin.mean()
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/modules/visual-base/visual_config.json b/anet_clip/backup/pdvc/modules/visual-base/visual_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..324fcb6e7ba63166767adf9afa82324412247a48
--- /dev/null
+++ b/anet_clip/backup/pdvc/modules/visual-base/visual_config.json
@@ -0,0 +1,12 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 1,
+  "vocab_size": 1024
+}
diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..5f86c9097b3b6f4b7f50b9d70f7cd58b2f386871
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO
@@ -0,0 +1,6 @@
+Metadata-Version: 2.1
+Name: MultiScaleDeformableAttention
+Version: 1.0
+Summary: PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention
+Home-page: https://github.com/fundamentalvision/Deformable-DETR
+Author: Weijie Su
diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc251e74aff93cae99a730109d3f696ef326b210
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+setup.py
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
+MultiScaleDeformableAttention.egg-info/PKG-INFO
+MultiScaleDeformableAttention.egg-info/SOURCES.txt
+MultiScaleDeformableAttention.egg-info/dependency_links.txt
+MultiScaleDeformableAttention.egg-info/top_level.txt
+functions/__init__.py
+functions/ms_deform_attn_func.py
+modules/__init__.py
+modules/ms_deform_attn.py
+modules/ms_deform_attn_for_caption.py
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d8f7790d14d04a74c6acec779aedb3688ef630
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt
@@ -0,0 +1,3 @@
+MultiScaleDeformableAttention
+functions
+modules
diff --git a/anet_clip/backup/pdvc/ops/__init__.py b/anet_clip/backup/pdvc/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ed3d8ddca46efead59543bfd2f1961790abdc96
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c722836c6716e99f5a33542ebc2461e4540b9c0
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..affe1b85a7c92a8c1ecfca0d0b2c329ce77bf383
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da5720c8c8f59f4168baf51ec63ba9c5f5e90d5abb998c0fbdd6170547d23a13
+size 7942000
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+try:
+    import MultiScaleDeformableAttention as MSDA
+except:
+    pass
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape    # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head
+
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape  # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points
+
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1]
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='border', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+
+    if return_value:
+        return torch.stack(sampling_value_list, dim=-2)
+    #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn import MSDeformAttn
+from .ms_deform_attn_for_caption import MSDeformAttnCap
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py
@@ -0,0 +1,126 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points )
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        if query.device.type == 'cuda':
+            output = MSDeformAttnFunction.apply(
+                value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights,
+                self.im2col_step)
+        else:
+            output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights)
+        output = self.output_proj(output)
+        return output
diff --git a/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py
@@ -0,0 +1,123 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttnCap(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        grid_init = grid_init - grid_init.mean(2, keepdim=True)
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 1
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights,
+                                             return_value=True)
+
+        return output
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps
new file mode 100644
index 0000000000000000000000000000000000000000..2bef29d420f02b4282644cba394698912212dab8
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps differ
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log
new file mode 100644
index 0000000000000000000000000000000000000000..fd78ae63cd064bb569f9279931f2e0668833f50d
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log
@@ -0,0 +1,4 @@
+# ninja log v5
+0	2930	1685020146224081877	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o	8f7db54445222f0
+0	10580	1685020153869972218	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o	91f10249ca524b9b
+0	13795	1685020157081510628	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o	3e48c35d2c631cee
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja
new file mode 100644
index 0000000000000000000000000000000000000000..9d156fb45877ed14f310b8ae1f889c048fe0fa2b
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja
@@ -0,0 +1,30 @@
+ninja_required_version = 1.3
+cxx = c++
+nvcc = /usr/local/cuda/bin/nvcc
+
+cflags = -pthread -B /home/liuhuabin/miniconda3/envs/PDVC/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c
+post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
+cuda_cflags = -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c
+cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 -std=c++14
+ldflags = 
+
+rule compile
+  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
+  depfile = $out.d
+  deps = gcc
+
+rule cuda_compile
+  depfile = $out.d
+  deps = gcc
+  command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags
+
+
+
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o: cuda_compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp
+
+
+
+
+
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o
new file mode 100644
index 0000000000000000000000000000000000000000..d30f1ff54acc23e3e0f5ea22b3a8828fdd2c44b7
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59afa2abc476414b1faa6816920a93293fc9e71aa96d790c80760a879f5d0682
+size 1437672
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..d9274a1b895a7c123eab8231e2e24c2ea6629581
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:973f1d16162f782172da95253065226cd068f45430bbc1a8920929ffda09947d
+size 920176
diff --git a/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o
new file mode 100644
index 0000000000000000000000000000000000000000..e771be34bcbacfa86a2e41f1728b9d0b2fef3a85
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad8100cd431dec4d7ef8dc5d144c90402c71b4b41a772e5f120c38b8fe9aa0e
+size 10423896
diff --git a/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg b/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg
new file mode 100644
index 0000000000000000000000000000000000000000..dc5bbc86e1f4304b490711416d30dbeecec3a2b8
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64ad69121c719dc533912a5233ee2ba4d895fd745283dc122601f20b0da2a519
+size 2223428
diff --git a/anet_clip/backup/pdvc/ops/functions/__init__.py b/anet_clip/backup/pdvc/ops/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00b83c1e1d8810a77347e3d76609cdf347898186
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09ce00b445b4c8d76b027f013de6cb094dae82dc
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..222160988ac28f5eba55fe2acff1a6b176b3429b
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fc0981ca1144f3eb8a7166b570fb797f8004a16
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py b/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/functions/ms_deform_attn_func.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+try:
+    import MultiScaleDeformableAttention as MSDA
+except:
+    pass
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape    # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head
+
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape  # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points
+
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1]
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='border', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+
+    if return_value:
+        return torch.stack(sampling_value_list, dim=-2)
+    #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/anet_clip/backup/pdvc/ops/make.sh b/anet_clip/backup/pdvc/ops/make.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a7e4320108ecd2f02d1824505849850b0c69d319
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/make.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+python setup.py build install
diff --git a/anet_clip/backup/pdvc/ops/modules/__init__.py b/anet_clip/backup/pdvc/ops/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/modules/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn import MSDeformAttn
+from .ms_deform_attn_for_caption import MSDeformAttnCap
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd29db0d448db6cc3ebfcb499cb6105d2f745555
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc85ead761d81b2d819429824ee2393e9f50a6ae
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1de99e2b9ab1efc42b399837d8cfd7a09a3e2ef1
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12c1ccbe61ed8ca360ce969e012e60a89d05cece
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..671fa7d00552b0d0913bf502750b061574f7b3f2
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a23f676c0714c277a628441a7459d2724f62b61
Binary files /dev/null and b/anet_clip/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc differ
diff --git a/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn.py
@@ -0,0 +1,126 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points )
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        if query.device.type == 'cuda':
+            output = MSDeformAttnFunction.apply(
+                value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights,
+                self.im2col_step)
+        else:
+            output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights)
+        output = self.output_proj(output)
+        return output
diff --git a/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py
@@ -0,0 +1,123 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttnCap(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        grid_init = grid_init - grid_init.mean(2, keepdim=True)
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 1
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights,
+                                             return_value=True)
+
+        return output
diff --git a/anet_clip/backup/pdvc/ops/setup.py b/anet_clip/backup/pdvc/ops/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0131bc21cf1b45b90fcf174e2c53e4c08e9c641
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/setup.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+import os
+import glob
+
+import torch
+
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension
+from torch.utils.cpp_extension import CUDAExtension
+
+from setuptools import find_packages
+from setuptools import setup
+
+requirements = ["torch", "torchvision"]
+
+def get_extensions():
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "src")
+
+    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
+    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
+
+    sources = main_file + source_cpu
+    extension = CppExtension
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    if torch.cuda.is_available() and CUDA_HOME is not None:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+    else:
+        raise NotImplementedError('Cuda is not availabel')
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+    include_dirs = [extensions_dir]
+    ext_modules = [
+        extension(
+            "MultiScaleDeformableAttention",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+    return ext_modules
+
+setup(
+    name="MultiScaleDeformableAttention",
+    version="1.0",
+    author="Weijie Su",
+    url="https://github.com/fundamentalvision/Deformable-DETR",
+    description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
+    packages=find_packages(exclude=("configs", "tests",)),
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+)
diff --git a/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1bf854de1f3860d20b6fef5c1a17817c268e70a
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
@@ -0,0 +1,41 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include <vector>
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+
diff --git a/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..81b7b58a3d9502bbb684dc84687a526dedf94cae
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h
@@ -0,0 +1,33 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);
+
+
diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d6d583647cce987196d5ad1968a8a365a379e774
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
@@ -0,0 +1,153 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include <vector>
+#include "cuda/ms_deform_im2col_cuda.cuh"
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+
+    const int num_levels = spatial_shapes.size(0);
+
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+
+    const int im2col_step_ = std::min(batch, im2col_step);
+
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    
+    auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
+
+    const int batch_n = im2col_step_;
+    auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto columns = output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
+            ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
+                value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                spatial_shapes.data<int64_t>(),
+                level_start_index.data<int64_t>(),
+                sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                columns.data<scalar_t>());
+
+        }));
+    }
+
+    output = output.view({batch, num_query, num_heads*channels});
+
+    return output;
+}
+
+
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
+
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
+
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+
+    const int num_levels = spatial_shapes.size(0);
+
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+
+    const int im2col_step_ = std::min(batch, im2col_step);
+
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+
+    auto grad_value = at::zeros_like(value);
+    auto grad_sampling_loc = at::zeros_like(sampling_loc);
+    auto grad_attn_weight = at::zeros_like(attn_weight);
+
+    const int batch_n = im2col_step_;
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto grad_output_g = grad_output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
+            ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
+                                    grad_output_g.data<scalar_t>(),
+                                    value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                                    spatial_shapes.data<int64_t>(),
+                                    level_start_index.data<int64_t>(),
+                                    sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                                    batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                                    grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
+                                    grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
+
+        }));
+    }
+
+    return {
+        grad_value, grad_sampling_loc, grad_attn_weight
+    };
+}
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7ae53f99c820ce6193b608ad344550348a0b42c
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h
@@ -0,0 +1,30 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);
+
diff --git a/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..5635be7822e7cbfb8b5524185f213a9368a91dce
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh
@@ -0,0 +1,1328 @@
+/*!
+**************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************
+* Modified from DCN (https://github.com/msracver/Deformable-ConvNets)
+* Copyright (c) 2018 Microsoft
+**************************************************************************
+*/
+
+#include <cstdio>
+#include <algorithm>
+#include <cstring>
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+#include <THC/THCAtomics.cuh>
+
+// 使用相同间隔分配block
+#define CUDA_KERNEL_LOOP(i, n)                          \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \
+      i < (n);                                          \
+      i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N, const int num_threads)
+{
+  return (N + num_threads - 1) / num_threads;
+}
+
+
+template <typename scalar_t>
+__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+  }
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value, 
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value); 
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  *grad_attn_weight = top_grad * val;
+  *grad_sampling_loc = width * grad_w_weight * top_grad_value;
+  *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;
+}
+
+
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value, 
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value); 
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  atomicAdd(grad_attn_weight, top_grad * val); 
+  atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);
+  atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_im2col_gpu_kernel(const int n,
+                                                const scalar_t *data_value, 
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    scalar_t *data_col_ptr = data_col + index;
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    scalar_t col = 0;
+    
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride);
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight;
+        }
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+      }
+    }
+    *data_col_ptr = col;
+  }
+}
+
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockSize; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          
+          
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockSize/2; s>0; s>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        { 
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockDim.x; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          
+          
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            } 
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        {
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            }
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        {
+          atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);
+          atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);
+          atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear_gm(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            grad_sampling_loc, grad_attn_weight);
+        }
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+void ms_deformable_im2col_cuda(cudaStream_t stream,
+                              const scalar_t* data_value,
+                              const int64_t* data_spatial_shapes, 
+                              const int64_t* data_level_start_index, 
+                              const scalar_t* data_sampling_loc,
+                              const scalar_t* data_attn_weight,
+                              const int batch_size,
+                              const int spatial_size, 
+                              const int num_heads, 
+                              const int channels, 
+                              const int num_levels, 
+                              const int num_query,
+                              const int num_point,
+                              scalar_t* data_col)
+{
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  const int num_threads = CUDA_NUM_THREADS;
+  ms_deformable_im2col_gpu_kernel<scalar_t>
+      <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+          0, stream>>>(
+      num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, 
+      batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col);
+  
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
+
+template <typename scalar_t>
+void ms_deformable_col2im_cuda(cudaStream_t stream,
+                              const scalar_t* grad_col,
+                              const scalar_t* data_value,
+                              const int64_t * data_spatial_shapes,
+                              const int64_t * data_level_start_index,
+                              const scalar_t * data_sampling_loc,
+                              const scalar_t * data_attn_weight,
+                              const int batch_size, 
+                              const int spatial_size, 
+                              const int num_heads,
+                              const int channels, 
+                              const int num_levels,
+                              const int num_query,
+                              const int num_point, 
+                              scalar_t* grad_value,
+                              scalar_t* grad_sampling_loc,
+                              scalar_t* grad_attn_weight)
+{
+  const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels;
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  if (channels > 1024)
+  {
+    if ((channels & 1023) == 0)
+    {
+      ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+    }
+    else
+    {
+      ms_deformable_col2im_gpu_kernel_gm<scalar_t>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+    }
+  }
+  else{
+    switch(channels)
+    {
+      case 1:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 1>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 2:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 2>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 4:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 4>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 8:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 8>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 16:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 16>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 32:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 32>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 64:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 64>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 128:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 128>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 256:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 256>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 512:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 512>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 1024:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 1024>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      default:
+        if (channels < 64)
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v1<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+        else
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v2<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+    }
+  }
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h b/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac0ef2ec25f7d0ee51ca2d807b159ddf85652017
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/ms_deform_attn.h
@@ -0,0 +1,62 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+
+#include "cpu/ms_deform_attn_cpu.h"
+
+#ifdef WITH_CUDA
+#include "cuda/ms_deform_attn_cuda.h"
+#endif
+
+
+at::Tensor
+ms_deform_attn_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_forward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+
+std::vector<at::Tensor>
+ms_deform_attn_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_backward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+
diff --git a/anet_clip/backup/pdvc/ops/src/vision.cpp b/anet_clip/backup/pdvc/ops/src/vision.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2201f63a51dca16d0b31148ed2c9e8e47ec15bdc
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/src/vision.cpp
@@ -0,0 +1,16 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include "ms_deform_attn.h"
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
+  m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
+}
diff --git a/anet_clip/backup/pdvc/ops/test.py b/anet_clip/backup/pdvc/ops/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbf6d5547d131f01a8c5c28b76557bd27a9334b
--- /dev/null
+++ b/anet_clip/backup/pdvc/ops/test.py
@@ -0,0 +1,89 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import time
+import torch
+import torch.nn as nn
+from torch.autograd import gradcheck
+
+from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+N, M, D = 1, 2, 2
+Lq, L, P = 2, 2, 2
+shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
+level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
+S = sum([(H*W).item() for H, W in shapes])
+
+
+torch.manual_seed(3)
+
+
+@torch.no_grad()
+def check_forward_equal_with_pytorch_double():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+
+    print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+
+
+@torch.no_grad()
+def check_forward_equal_with_pytorch_float():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+
+    print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+
+
+def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
+
+    value = torch.rand(N, S, M, channels).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    func = MSDeformAttnFunction.apply
+
+    value.requires_grad = grad_value
+    sampling_locations.requires_grad = grad_sampling_loc
+    attention_weights.requires_grad = grad_attn_weight
+
+    gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
+
+    print(f'* {gradok} check_gradient_numerical(D={channels})')
+
+
+if __name__ == '__main__':
+    check_forward_equal_with_pytorch_double()
+    check_forward_equal_with_pytorch_float()
+
+    for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
+        check_gradient_numerical(channels, True, True, True)
+
+
+
diff --git a/anet_clip/backup/pdvc/pdvc.py b/anet_clip/backup/pdvc/pdvc.py
new file mode 100644
index 0000000000000000000000000000000000000000..c342477fb906acda08cf40a040eb45b2b9e901b8
--- /dev/null
+++ b/anet_clip/backup/pdvc/pdvc.py
@@ -0,0 +1,1303 @@
+# ------------------------------------------------------------------------
+# PDVC
+# ------------------------------------------------------------------------
+# Modified from Deformable DETR(https://github.com/fundamentalvision/Deformable-DETR)
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+import json
+import torch
+import torch.nn.functional as F
+from torch import nn
+import math
+import time
+
+from misc.detr_utils import box_ops
+from misc.detr_utils.misc import (inverse_sigmoid)
+
+from .matcher import build_matcher
+
+from .deformable_transformer import build_deforamble_transformer
+from pdvc.CaptioningHead import build_captioner
+import copy
+from .criterion import AlignCriterion, SetCriterion, ContrastiveCriterion
+# from .rl_tool import init_scorer
+from misc.utils import decide_two_stage
+from .base_encoder import build_base_encoder
+# from .video_segmentation import segment_video_into_steps, alignment_to_boundary, to_center_duration, align_frame_into_steps
+from .video_segmentation import *
+# from transformers import AutoModel, BertConfig
+# from transformers.models.bert.modeling_bert import BertEncoder
+import numpy as np
+from itertools import chain
+# from .UniVL import load_pretrained_UniVL
+
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+class PDVC(nn.Module):
+    """ This is the PDVC module that performs dense video captioning """
+
+    def __init__(self, base_encoder, transformer, captioner, num_classes, num_queries, num_feature_levels,
+                 aux_loss=True, with_box_refine=False, opt=None, translator=None):
+        """ Initializes the model.
+        Parameters:
+            transformer: torch module of the transformer architecture. See transformer.py
+            captioner: captioning head for generate a sentence for each event queries
+            num_classes: number of foreground classes
+            num_queries: number of event queries. This is the maximal number of events
+                         PDVC can detect in a single video. For ActivityNet Captions, we recommend 10-30 queries.
+            aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used.
+            with_box_refine: iterative bounding box refinement
+            opt: all configs
+        """
+        super().__init__()
+        self.opt = opt
+        self.base_encoder = base_encoder
+        self.transformer = transformer
+        self.caption_head = captioner
+        num_pred_text = 0
+
+        # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+        #     self.text_encoder = text_encoder
+        #     text_encoder_hidden_dim = self.text_encoder.config.hidden_size
+        #     num_pred_text += 1
+
+        hidden_dim = transformer.d_model
+        text_hidden_dim = opt.text_hidden_dim
+        
+        if self.opt.use_anchor:
+            # self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+            self.anchor_embed = nn.Embedding(num_queries, 2) # num_queries, 2 (center, duration) 
+            self.query_embed = self.transformer.prepare_init_anchor_and_query(self.anchor_embed, hidden_dim, \
+                                                                        random_anchor_init=True, prior_anchor_duration_init=True, \
+                                                                        prior_duration=0.048)
+            self.query_embed = nn.Parameter(self.query_embed, requires_grad=True)
+        else:
+            self.query_embed = nn.Embedding(num_queries, hidden_dim * 2)
+
+        self.class_head = nn.Linear(hidden_dim, num_classes)
+        self.class_refine_head = nn.Linear(hidden_dim, num_classes) # For refine pseudo box if use additional score layer
+        self.count_head = nn.Linear(hidden_dim, opt.max_eseq_length + 1)
+        self.bbox_head = MLP(hidden_dim, hidden_dim, 2, 3)
+
+        self.num_feature_levels = num_feature_levels
+        self.aux_loss = aux_loss
+        self.with_box_refine = with_box_refine
+        self.share_caption_head = opt.share_caption_head
+
+        # initialization
+        prior_prob = 0.01
+        bias_value = -math.log((1 - prior_prob) / prior_prob)
+        self.class_head.bias.data = torch.ones(num_classes) * bias_value
+        self.class_refine_head.bias.data = torch.ones(num_classes) * bias_value
+        nn.init.constant_(self.bbox_head.layers[-1].weight.data, 0)
+        nn.init.constant_(self.bbox_head.layers[-1].bias.data, 0)
+
+        if self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim' \
+            or self.opt.use_pseudo_box:
+            self.load_text_embed = True
+        else:
+            self.load_text_embed = False
+
+
+        num_pred = transformer.decoder.num_layers
+        if self.share_caption_head:
+            print('all decoder layers share the same caption head')
+            self.caption_head = nn.ModuleList([self.caption_head for _ in range(num_pred)])
+        else:
+            print('do NOT share the caption head')
+            self.caption_head = _get_clones(self.caption_head, num_pred)
+
+        if self.opt.use_additional_cap_layer:
+            self.caption_head_refine = _get_clones(captioner, self.opt.refine_pseudo_stage_num)
+
+        if with_box_refine:
+            self.class_head = _get_clones(self.class_head, num_pred)
+            self.count_head = _get_clones(self.count_head, num_pred)
+            self.bbox_head = _get_clones(self.bbox_head, num_pred)
+            nn.init.constant_(self.bbox_head[0].layers[-1].bias.data[1:], -2)
+            # hack implementation for iterative bounding box refinement
+            self.transformer.decoder.bbox_head = self.bbox_head
+        else:
+            nn.init.constant_(self.bbox_head.layers[-1].bias.data[1:], -2)
+            self.class_head = nn.ModuleList([self.class_head for _ in range(num_pred)])
+            self.count_head = nn.ModuleList([self.count_head for _ in range(num_pred)])
+            self.bbox_head = nn.ModuleList([self.bbox_head for _ in range(num_pred)])
+            self.transformer.decoder.bbox_head = None
+
+        self.class_refine_head = _get_clones(self.class_refine_head, self.opt.refine_pseudo_stage_num)
+        # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+        if opt.disable_contrastive_projection:
+            projection_event = nn.Identity()
+            projection_text = nn.Identity()
+        else:
+            projection_event = nn.Linear(hidden_dim, opt.contrastive_hidden_size)
+            projection_text = nn.Linear(text_hidden_dim, opt.contrastive_hidden_size)
+        self.contrastive_projection_event = nn.ModuleList(
+            [projection_event for _ in range(num_pred)])
+        self.contrastive_projection_text = nn.ModuleList(
+            [projection_text for _ in range(num_pred)])
+        if opt.enable_bg_for_cl:
+            self.background_embed = nn.Parameter(torch.randn(1, opt.contrastive_hidden_size), requires_grad=True)
+        else:
+            self.background_embed = None
+            
+
+        self.translator = translator
+
+        self.disable_mid_caption_heads = opt.disable_mid_caption_heads
+        if self.disable_mid_caption_heads:
+            print('only calculate caption loss in the last decoding layer')
+        
+        self.pseudo_boxes = {}
+        
+
+    def get_filter_rule_for_encoder(self):
+        filter_rule = lambda x: 'input_proj' in x \
+                                or 'transformer.encoder' in x \
+                                or 'transformer.level_embed' in x \
+                                or 'base_encoder' in x
+        return filter_rule
+
+    def encoder_decoder_parameters(self):
+        filter_rule = self.get_filter_rule_for_encoder()
+        enc_paras = []
+        dec_paras = []
+        for name, para in self.named_parameters():
+            if filter_rule(name):
+                print('enc: {}'.format(name))
+                enc_paras.append(para)
+            else:
+                print('dec: {}'.format(name))
+                dec_paras.append(para)
+        return enc_paras, dec_paras
+
+    # def text_encoding(self, text_encoder_input):
+    #     '''
+    #     Produce the text embedding for each caption
+    #     :param text_encoder_input: a dict of input for text encoder
+    #     '''
+    #     if self.opt.pretrained_language_model == 'UniVL' or self.opt.use_pseudo_box:
+    #         # breakpoint()
+    #         dtype = next(self.parameters()).dtype
+    #         enable_grad = False
+    #         use_amp = False
+    #         with torch.cuda.amp.autocast(enabled=use_amp):
+    #             with torch.set_grad_enabled(enable_grad):
+    #                 text_embed = self.text_encoder(**text_encoder_input, output_all_encoded_layers=True)[0][-1]
+    #         text_embed = text_embed.to(dtype=dtype) # num_sentence, num_word, dim
+    #         attention_mask = text_encoder_input['attention_mask'].unsqueeze(-1).to(dtype=dtype) # num_sentence, num_word, 1
+    #         attention_mask[:,0,:] = 0. # This operation follows from the UniVL 
+    #         text_embed = text_embed * attention_mask # num_sentence, num_word, dim
+    #         text_embed = text_embed.sum(dim=1) / attention_mask.sum(dim=1) # num_sentence, dim
+    #         raw_text_embed = text_embed
+    #         # if video_name:
+    #         #     text_feature_path = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text'
+    #         #     np.save('{}/{}.npy'.format(text_feature_path, video_name), text_embed.detach().cpu().numpy())
+    #         text_embed = self.contrastive_projection_text[-1](text_embed)
+            
+    #     else:
+    #         dtype = next(self.parameters()).dtype
+    #         enable_grad = False
+    #         use_amp = False
+    #         with torch.cuda.amp.autocast(enabled=use_amp):
+    #             with torch.set_grad_enabled(enable_grad):
+    #                 text_embed = self.text_encoder(**text_encoder_input)
+    #         text_embed = text_embed['pooler_output'].to(dtype=dtype) # num_sentence, dim
+    #         text_embed = self.contrastive_projection_text[-1](text_embed) # num_sentence, dim_contrastive_learning
+    #         # TODO: add more paradigm to generate the text_embedding
+
+    #     return text_embed, raw_text_embed
+
+    def forward(self, dt, criterion, contrastive_criterion, eval_mode=False):
+        transformer_input_type = self.opt.transformer_input_type
+        vf = dt['video_tensor']  # (N, L, C)
+        mask = ~ dt['video_mask']  # (N, L)
+        duration = dt['video_length'][:, 1]
+        video_name = dt['video_key'][0][2:]
+        # text_encoder_input = dt['text_encoder_input'] if (self.opt.matcher_type=='DTW' or self.opt.use_pseudo_box) else None
+        N, L, C = vf.shape
+        # assert N == 1, "batch size must be 1."s
+
+        srcs, masks, pos = self.base_encoder(vf, mask, duration)
+
+        src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten = self.transformer.prepare_encoder_inputs(
+            srcs, masks, pos)
+        memory = self.transformer.forward_encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios,
+                                                  lvl_pos_embed_flatten, mask_flatten)
+
+        two_stage, disable_iterative_refine, proposals, proposals_mask = decide_two_stage(transformer_input_type,
+                                                                                                dt, criterion)
+        if two_stage:
+            if transformer_input_type == 'prior_proposals':
+                if self.opt.prior_manner == 'add':
+                    #print('Insert the prior knowledge by adding the prior proposals to the query embed')
+                    init_query_embed = self.query_embed.weight
+                    _, tgt = torch.chunk(init_query_embed, 2, dim=1)
+                    tgt = tgt.unsqueeze(0).expand(N, -1, -1)
+                    init_reference, _, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0])
+                    proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool()
+                else:
+                    init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0])
+                    proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool()
+            else:
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_proposal(
+                    proposals)
+        else:
+            if self.opt.use_anchor:
+                # tgt = self.tgt_embed.weight
+                anchor = self.anchor_embed.weight # num_queries, 2
+                query_anchor = (self.query_embed, anchor)
+                proposals_mask = torch.ones(N, self.query_embed.shape[0], device=self.query_embed.device).bool()
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_anchor(memory, query_anchor)
+            else:            
+                query_embed = self.query_embed.weight
+                proposals_mask = torch.ones(N, query_embed.shape[0], device=query_embed.device).bool()
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_query(memory,
+                                                                                                              query_embed)
+        hs, inter_references = self.transformer.forward_decoder(tgt, reference_points, memory, temporal_shapes,
+                                                                level_start_index, valid_ratios, query_embed,
+                                                                mask_flatten, proposals_mask, disable_iterative_refine)
+        # hs: [num_decoder_layer, bs, num_query, feat_dim]
+
+        # breakpoint()
+        # project to co-embedding space
+        if self.load_text_embed and eval_mode==False:
+            # text_embed, raw_text_embed = self.text_encoding(text_encoder_input)
+            # text_embed = [text_embed] * hs.shape[0]
+            # text_embed = torch.stack(text_embed, dim=0)
+            raw_text_embed = dt['cap_embed'] * hs.shape[0]# dt['caption_embedding'] returns a tuple(list)
+            # text_embed: [num_decoder_layer, num_sentence, contrastive_dim]
+            event_embed = torch.stack([self.contrastive_projection_event[i](hs_i) for i, hs_i in enumerate(hs)])
+            text_embed = torch.stack([self.contrastive_projection_text[j](hs_j.cuda()) for j, hs_j in enumerate(raw_text_embed)])
+            # breakpoint()
+            # event_embed: [num_decoder_layer, num_query, contrastive_dim]
+        else:
+            raw_text_embed = None
+            text_embed = None
+            event_embed = hs
+        # breakpoint()
+        if self.opt.use_pseudo_box and self.training:
+            # breakpoint()
+            # print('use pseudo box')
+            video_frame_num = dt['video_length'][:,0].cpu().numpy() # [feature_len, raw_video_len, video_len]
+            video_name = dt['video_key'][0]
+            if self.pseudo_boxes.get(video_name) is not None and 'box' in self.pseudo_boxes[video_name].keys() and 'loss' in self.pseudo_boxes[video_name].keys():
+                # if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2':
+                video_step_alignment = [self.pseudo_boxes[video_name]['box']]
+
+            else:
+                if self.opt.pseudo_box_type == 'align':
+                    video_step_segment = [segment_video_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                    bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+                # elif self.opt.pseudo_box_type == 'similarity':
+                #     video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                #     bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)]
+                # breakpoint()
+                elif self.opt.pseudo_box_type == "similarity":
+                    # breakpoint()
+                    if self.opt.width_ratio < 0:
+                        video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode) for i in range(N)]
+                    else:
+                        video_step_alignment = [align_frame_into_steps_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                            topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode, ratio=self.opt.width_ratio) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op':
+                    video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=False, num_iterations=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op_order':
+                    video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op_order_v1':
+                    video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]  
+                elif self.opt.pseudo_box_type == 'similarity_op_order_v2':
+                    video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'weight_sim':
+                    if self.opt.width_ratio < 0:
+                        video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)]
+                    else:
+                        # breakpoint()
+                        video_step_alignment = [step_retrieval_weight_sim_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)]
+
+                elif self.opt.pseudo_box_type == 'weight_index':
+                    video_step_alignment = [step_retrieval_weight_index(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                        topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'modeframe':
+                    video_step_alignment = [align_frame_into_steps_mode(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                        topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'uniform':
+                    video_step_alignment = [uniform_box(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                    # breakpoint()
+                else:
+                    raise NotImplementedError('pseudo_box_type {} is not implemented'.format(self.opt.pseudo_box_type))
+                
+
+                if self.opt.pseudo_box_type != 'align':
+                    if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2':
+                        # breakpoint()
+                        video_step_alignment, loss_op = [out[0] for out in video_step_alignment], [out[1] for out in video_step_alignment]
+                        self.pseudo_boxes[video_name] = {'box': video_step_alignment[0], 'loss': loss_op[0].item()}
+                    else:
+                        self.pseudo_boxes[video_name] = {'box': video_step_alignment[0]}
+            
+            if self.opt.pseudo_box_type != 'align':
+                bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)]
+            else:
+                bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+
+                
+                # self.pseudo_boxes[video_name] = video_step_alignment[0]
+                # self.pseudo_boxes[video_name] = video_step_alignment[0]
+                # bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+
+            bbox_alignment = to_center_duration(bbox_alignment)
+
+
+            for sample in range(len(dt['video_target'])):
+                dt['video_target'][sample]['boxes_pseudo'] = bbox_alignment[sample]
+                # dt['video_target'][sample]['boxes'] = bbox_alignment[sample]
+        # else:
+        #     print('use gt box')
+
+        #breakpoint()
+        others = {'memory': memory,
+                  'mask_flatten': mask_flatten,
+                  'spatial_shapes': temporal_shapes,
+                  'level_start_index': level_start_index,
+                  'valid_ratios': valid_ratios,
+                  'proposals_mask': proposals_mask,
+                  'text_embed': text_embed,
+                  'event_embed': event_embed}
+        # breakpoint()
+        if eval_mode or self.opt.caption_loss_coef == 0:
+            out, loss = self.parallel_prediction_full(dt, criterion, hs, init_reference, inter_references, others,
+                                                      disable_iterative_refine, transformer_input_type)
+        else:
+            if self.opt.refine_pseudo_box and self.opt.use_pseudo_box:
+                # print('refine')
+                out, loss = self.parallel_prediction_refine_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                                         disable_iterative_refine, transformer_input_type)
+            else:
+                # print('no refine')
+                out, loss = self.parallel_prediction_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                                         disable_iterative_refine, transformer_input_type)
+        return out, loss
+
+    def predict_event_num(self, counter, hs_lid):
+        hs_lid_pool = torch.max(hs_lid, dim=1, keepdim=False)[0]  # [bs, feat_dim]
+        outputs_class0 = counter(hs_lid_pool)
+        return outputs_class0
+
+    def parallel_prediction_full(self, dt, criterion, hs, init_reference, inter_references, others,
+                                 disable_iterative_refine, transformer_input_type='queries'):
+        '''
+        hs: [decoder_layer, bs, num_query, feat_dim]
+        init_reference: [bs, num_query, 1]
+        inter_references: [decoder_layer, bs, num_query, 2]
+        '''
+        outputs_classes = []
+        outputs_classes0 = []
+        outputs_coords = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        num_pred = hs.shape[0]
+        #breakpoint()
+        for l_id in range(hs.shape[0]):
+            if l_id == 0:
+                reference = init_reference
+            else:
+                reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            output_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            n_pred_sentence = output_count.argmax(dim=-1).clamp(min=1).item()
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 4]
+
+            # if self.opt.disable_mid_caption_heads and (l_id != hs.shape[0] - 1):
+            if l_id != hs.shape[0] - 1:
+                cap_probs, seq = self.caption_prediction_eval(
+                    self.caption_head[l_id], dt, hs_lid, reference, others, 'none')
+            else:
+                cap_probs, seq = self.caption_prediction_eval(
+                    self.caption_head[l_id], dt, hs_lid, reference, others, self.opt.caption_decoder_type)  # Only output caption in the last decoding layer
+
+            # if self.opt.use_anchor:
+            #     outputs_coord = reference
+            # else:
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if self.opt.matcher_type == 'DTW':
+                    assert reference.shape[-1] == 2 and tmp.shape[-1] == 2
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :2] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 2]
+
+            outputs_classes.append(outputs_class)
+            outputs_classes0.append(output_count)
+            outputs_coords.append(outputs_coord)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        output_count = torch.stack(outputs_classes0)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+
+        all_out = {'pred_logits': outputs_class,
+                   'pred_count': output_count,
+                   'pred_boxes': outputs_coord,
+                   'caption_probs': outputs_cap_probs,
+                   'seq': outputs_cap_seqs}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+
+        # loss, _, _ = criterion(out, dt['video_target'], others)
+        return out, []
+
+    def parallel_prediction_refine_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                    disable_iterative_refine, transformer_input_type='queries'):
+        
+        outputs_classes = []
+        outputs_counts = []
+        outputs_coords = []
+        outputs_cap_costs = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        cl_match_mats = []
+
+        num_pred = hs.shape[0]
+        if self.opt.pseudo_box_aug:
+            assert self.opt.use_pseudo_box
+            num_sentence = dt['gt_boxes'].size(-2)
+            assert num_sentence == len(dt['cap_raw'][0])
+            if self.opt.pseudo_box_aug_num * num_sentence > self.opt.num_queries:
+                aug_num = self.opt.num_queries // num_sentence
+            else:
+                aug_num = self.opt.pseudo_box_aug_num
+            if self.opt.refine_pseudo_box:
+                ori_dt_cap_tensor = copy.deepcopy(dt['cap_tensor'])
+                ori_dt_cap_mask = copy.deepcopy(dt['cap_mask'])
+            cap_dim = dt['cap_tensor'].shape[-1] #(num_sen, num_max_word)
+            dt['cap_tensor'] = dt['cap_tensor'].repeat(1, aug_num).reshape(-1, cap_dim)
+            dt['cap_mask'] = dt['cap_mask'].repeat(1, aug_num).reshape(-1, cap_dim)
+
+        for l_id in range(num_pred):
+            hs_lid = hs[l_id]
+            reference = init_reference if l_id == 0 else inter_references[
+                l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 2]
+            
+            cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid,
+                                                                                 reference, others, 'none')
+
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :1] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 4]
+
+            # Processing the text embed and event embed for alignment
+            if self.load_text_embed or self.opt.disable_contrastive_projection:
+                assert others['text_embed'].shape[0] == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0])
+                text_embed = others['text_embed'][l_id]   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id] 
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                # event_embed = event_embed.reshape(-1, event_embed.shape[-1])
+                # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat'
+
+
+            if self.opt.enable_contrastive and self.opt.set_cost_cl > 0:
+                assert len(others['text_embed']) == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed']))
+                text_embed = torch.cat(others['text_embed'][l_id], dim=0)   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id]
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t()
+                # cl_match_mat: [num_query, num_sentence]
+                cl_match_mats.append(cl_match_mat)
+            else:
+                cl_match_mats.append(0)
+
+            outputs_classes.append(outputs_class)
+            outputs_counts.append(outputs_count)
+            outputs_coords.append(outputs_coord)
+            # outputs_cap_losses.append(cap_loss)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        outputs_count = torch.stack(outputs_counts)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+        # outputs_cap_loss = torch.stack(outputs_cap_losses)
+
+        all_out = {
+            'pred_logits': outputs_class,
+            'pred_count': outputs_count,
+            'pred_boxes': outputs_coord,
+            'caption_probs': outputs_cap_probs,
+            'seq': outputs_cap_seqs,
+            'cl_match_mats': cl_match_mats}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+
+        # ============================= Refine pseudo box here ================================
+        ks, vs = list(zip(*(all_out.items())))
+        out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+        mil_dict = {}
+        bag_score_cache = []
+        for stage in range(self.opt.refine_pseudo_stage_num):
+            # Decay augment ratio as the stage increases
+            aug_ratio = self.opt.pseudo_box_aug_ratio * (0.5 ** stage)
+            _, last_indices, aux_indices = criterion(out, dt['video_target'], others, aug_num, aug_ratio)
+            # Only use the last decoder layer output to conduct the pseudo box refinement
+            hs_lid = hs[-1]
+            reference = inter_references[-1] #[1, num_query, 2]
+            indices = last_indices[0] # [tensor(): num_matched_query ,tensor(): num_matched_cap]
+            query_indices = indices[0][0] # the indices of matched query is ordered
+            cap_indices = indices[0][1] # the indices of matched sentence is unordered
+            # breakpoint()
+            # num_sentence = cap_indices.size(0) // self.opt.pseudo_box_aug_num
+            cap_sort = torch.sort(cap_indices)[1]
+            reorder_query_indices = query_indices[cap_sort]
+            if self.opt.use_neg_pseudo_box:
+                neg_query_indices = []
+                neg_cap_indices = torch.arange(0,cap_indices.size(0),aug_num).view(num_sentence,-1).repeat(1,self.opt.num_neg_box).view(-1)
+                for i in range(num_sentence):
+                    # select some negetive indices from reordered query indices
+                    candidates_r = (reorder_query_indices[(i+1)*aug_num:])
+                    candidates_l = (reorder_query_indices[:(i)*aug_num])
+                    if (candidates_r.size(0) > 0) and (candidates_l.size(0) > 0):
+                        candidates = torch.cat((candidates_r, candidates_l))
+                    else:
+                        candidates = candidates_r if candidates_r.size(0) > 0 else candidates_l
+                    if candidates.size(0) == 0:
+                        candidates = reorder_query_indices
+                    if candidates.size(0) < self.opt.num_neg_box:
+                        random_selected_indices = torch.randperm(candidates.size(0))
+                        padding_num = self.opt.num_neg_box - candidates.size(0)
+                        random_selected_indices = torch.cat((random_selected_indices, random_selected_indices[:padding_num]))
+                    else:
+                        random_selected_indices = torch.randperm(reorder_query_indices.size(0)-aug_num)[:self.opt.num_neg_box]
+                    neg_query_indices.append(candidates[random_selected_indices])
+                neg_query_indices = torch.cat(neg_query_indices)
+                neg_indices = [(neg_query_indices, neg_cap_indices)]
+            # query_indices: ordered, cap_indices: unordered
+            # ++++++ <1>. Produce the instance score and classification score
+            if self.opt.use_additional_cap_layer:
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, indices)
+                if (stage > 0) and self.opt.use_neg_pseudo_box:
+                    _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, neg_indices)
+            else:
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, indices)
+                if (stage > 0) and self.opt.use_neg_pseudo_box:
+                    _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, neg_indices) 
+            # breakpoint()   
+            # sentence_cap_prob: the caption probility for each matched query torch.Size([num_matched_query])
+            if self.opt.use_additional_score_layer:
+                query_ins_score = self.class_refine_head[stage](hs_lid)[:, query_indices, :]
+            else:
+                query_ins_score = outputs_classes[-1][:, query_indices, :] # [1, num_matched_query, 1]
+            query_pred_boxes = outputs_coord[-1][:, query_indices, :] # [1, num_matched_query, 2]
+            query_pred_boxes = query_pred_boxes[0,:,:][cap_sort].view(-1, 2) # [num_matched_query, 2]
+            # breakpoint()
+            try:
+                query_ins_score = query_ins_score[0,cap_sort,0].view(-1, aug_num) # [num_cap, num_aug]
+            except:
+                breakpoint()
+            if self.opt.norm_ins_score == 'softmax':
+                query_ins_score = torch.softmax(query_ins_score, dim=-1)
+            elif self.opt.norm_ins_score == 'sigmoid':
+                query_ins_score = query_ins_score.sigmoid()
+            else:
+                raise NotImplementedError
+
+            # breakpoint()
+            # sentence_cap_score = cap_probs['cap_prob_train']
+            temperature = 2
+            sentence_cap_prob = sentence_cap_prob[cap_sort].view(-1, aug_num) # [num_cap, num_aug]
+            cap_len = torch.tensor([len(cap.split()) for cap in dt['cap_raw'][0]], device=sentence_cap_prob.device).unsqueeze(1)
+            sentence_cap_score = (sentence_cap_prob / cap_len) ** temperature + 1e-5
+
+            sentence_cap_score[torch.isinf(sentence_cap_score)] = 1e8
+
+            sentence_cap_score = sentence_cap_score.detach()
+            query_ins_score = query_ins_score.detach()
+
+            # breakpoint()
+            query_score = sentence_cap_score + query_ins_score
+            # sentence_score = 
+            # if (stage == 0) or (self.opt.focal_mil == False):
+            #     sentence_cap_prob = torch.softmax(sentence_cap_prob, dim=-1) # Softmax over queries in the same bag
+            # else:
+            #     sentence_cap_prob = sentence_cap_prob.sigmoid()
+
+            # if self.opt.cap_prob_clip:
+            #     query_score = sentence_cap_prob.detach() * query_ins_score # [num_cap, num_aug]
+            # else:
+            #     query_score = sentence_cap_prob * query_ins_score # [num_cap, num_aug]
+
+            # # ++++++ <2>. Calculate the MIL loss and Neg loss
+            bag_score = query_score.sum(dim=-1) # [num_cap]
+            bag_score = bag_score.clamp(0,1)
+            bag_score_cache.append(bag_score)
+            mil_weight = bag_score_cache[stage-1] if self.opt.weighted_mil_loss else torch.ones_like(bag_score).to(bag_score.device)
+            if stage > 0:
+                if self.opt.focal_mil:
+                    focal_weight = (torch.ones_like(bag_score).to(bag_score.device) - bag_score).pow(2)
+                    mil_loss =  - focal_weight * (bag_score + 1e-6).log()
+                    mil_loss = (mil_weight * mil_loss).mean()
+                else:
+                    # breakpoint()
+                    mil_loss = - (mil_weight * bag_score.log()).mean()
+                if self.opt.use_neg_pseudo_box:
+                    neg_cap_prob = neg_cap_prob.sigmoid()
+                    neg_loss = - ((neg_cap_prob).pow(2) * (1- neg_cap_prob).log()).view(num_sentence,-1).mean(dim=-1)
+                    neg_loss = (mil_weight * neg_loss).mean()
+                    mil_loss += neg_loss
+            else:
+                mil_loss = F.binary_cross_entropy(bag_score, torch.ones_like(bag_score).to(bag_score.device))
+            if 'loss_mil' in mil_dict.keys():
+                mil_dict['loss_mil'] += mil_loss
+            else:
+                mil_dict['loss_mil'] =  mil_loss
+            # ++++++ <3>. Merge the pseudo box to generate new pseudo box
+            if self.opt.merge_criterion == 'cap_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(sentence_cap_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k]
+            elif self.opt.merge_criterion == 'ins_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_ins_score, k=self.opt.merge_k_boxes, dim=-1)
+            elif self.opt.merge_criterion == 'ins_cap_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k]
+            else:
+                raise NotImplementedError('merge_criterion {} is not implemented'.format(self.opt.merge_criterion))
+            # breakpoint()
+            topk_pseudo_scores = topk_pseudo_scores / (topk_pseudo_scores.sum(dim=-1, keepdim=True) + 1e-6) # [num_caption, k]
+            weight = topk_pseudo_scores.unsqueeze(-1).repeat(1,1,2) # [num_caption, k, 2]
+            for i in range(len(dt['video_target'])):
+                previous_pseudo_box = dt['video_target'][i]['box_pseudo_aug'] #[num_caption*num_aug, 2]
+                if self.opt.use_query_box_for_refine:
+                    # Use the coordinates of query as part of guidance for refinement
+                    previous_pseudo_box = (previous_pseudo_box + query_pred_boxes) / 2
+                if self.opt.merge_mode == 'weighted_sum':
+                    # Merge top-k boxes with weighted sum
+                    selected_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \
+                                                    topk_pseudo_indices.unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) # [num_caption, k, 2]
+                    refined_pseudo_box = (weight * selected_pseudo_box).sum(dim=1).clamp(0,1) # [num_caption, 2]
+                    dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone()
+                # I met the following problem with ''targets_cp = copy.deepcopy(targets)'' in criterion.py:
+                # RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment
+                # When I tried to conduct the deepcopy operation with the targets which have been updated with 'boxes_pseudo' keys
+                # So I detach the refined_pseudo_box here to avoid the deepcopy operation here
+                # Commented by Huabin, 2023/9/14
+                elif self.opt.merge_mode == 'interpolate':
+                    # Generate new box with linear interpolation between previous pbox and pbox with max score
+                    max_pseudo_scores = topk_pseudo_scores[:,:1]
+                    max_coef = 0.5 * torch.ones_like(max_pseudo_scores).to(max_pseudo_scores.device) # Set a max coef for box interpolatation
+                    max_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \
+                                                    topk_pseudo_indices[:,:1].unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1)))
+                    interpolate_coef = torch.min(max_pseudo_scores, max_coef)
+                    refined_pseudo_box = (1-interpolate_coef) * previous_pseudo_box[(aug_num-1)::aug_num, :] \
+                                        + interpolate_coef * max_pseudo_box.squeeze(1)
+                    refined_pseudo_box = refined_pseudo_box.clamp(0,1)
+                    dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone()
+
+        # ++++++ <4>. End of the refinement, inverse-repeat the dt['cap_tensor'] and dt['cap_mask']
+        dt['cap_tensor'] = ori_dt_cap_tensor
+        dt['cap_mask'] = ori_dt_cap_mask
+        mil_dict['loss_mil'] = mil_dict['loss_mil'] / self.opt.refine_pseudo_stage_num
+        criterion.pseudo_box_aug = False
+        # ================== End of refinement ========================================
+        # breakpoint()
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+            loss, last_indices, aux_indices = criterion(out, dt['video_target'], others)
+            if self.opt.disable_rematch:
+                # Disable re-matching and directly use the indices with max score in the last stage of refinment
+                selected_indices = query_score.argmax(dim=-1).unsqueeze(-1)
+                query_indices_in_refine = reorder_query_indices.to(selected_indices.device).view(-1, aug_num)
+                query_indices_in_refine = query_indices_in_refine.gather(1, selected_indices)
+                query_indices_in_refine, index_sort = torch.sort(query_indices_in_refine, 0)
+                cap_indices_in_refine = last_indices[0][0][1].sort()[0]
+                last_indices = [[(query_indices_in_refine.view(-1), cap_indices_in_refine[index_sort.view(-1)])], last_indices[1]]
+            loss.update(mil_dict)
+            criterion.pseudo_box_aug = True 
+            for l_id in range(hs.shape[0]):
+                hs_lid = hs[l_id]
+                reference = init_reference if l_id == 0 else inter_references[l_id - 1]
+                indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0]
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                                   others, self.opt.caption_decoder_type, indices)
+                l_dict = {'loss_caption': cap_loss}
+                if l_id != hs.shape[0] - 1:
+                    l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()}
+                loss.update(l_dict)
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+        else:
+            loss, last_indices = criterion(out, dt['video_target'], others)
+            criterion.pseudo_box_aug = True
+            l_id = hs.shape[0] - 1
+            reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            indices = last_indices[0]
+            cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                               others, self.opt.caption_decoder_type, indices)
+            l_dict = {'loss_caption': cap_loss}
+            loss.update(l_dict)
+
+            out.pop('caption_losses')
+            out.pop('caption_costs')
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+
+
+        return out, loss
+
+    def parallel_prediction_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                    disable_iterative_refine, transformer_input_type='queries'):
+        
+        outputs_classes = []
+        outputs_counts = []
+        outputs_coords = []
+        outputs_cap_costs = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        cl_match_mats = []
+
+        num_pred = hs.shape[0]
+
+        if self.opt.pseudo_box_aug:
+            assert self.opt.use_pseudo_box
+            cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word)
+            dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+            dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+
+        for l_id in range(num_pred):
+            hs_lid = hs[l_id]
+            reference = init_reference if l_id == 0 else inter_references[
+                l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 2]
+
+
+            cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid,
+                                                                                 reference, others, 'none')
+            # if self.opt.use_anchor:
+            #     outputs_coord = reference
+            # else:
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :1] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 4]
+
+            # Processing the text embed and event embed for alignment
+            if self.load_text_embed or not self.opt.disable_contrastive_projection:
+                assert others['text_embed'].shape[0] == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0])
+                text_embed = others['text_embed'][l_id]   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id] 
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                # event_embed = event_embed.reshape(-1, event_embed.shape[-1])
+                # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat'
+
+
+            if self.opt.enable_contrastive and self.opt.set_cost_cl > 0:
+                assert len(others['text_embed']) == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed']))
+                text_embed = torch.cat(others['text_embed'][l_id], dim=0)   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id]
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t()
+                # cl_match_mat: [num_query, num_sentence]
+                cl_match_mats.append(cl_match_mat)
+            else:
+                cl_match_mats.append(0)
+
+            outputs_classes.append(outputs_class)
+            outputs_counts.append(outputs_count)
+            outputs_coords.append(outputs_coord)
+            # outputs_cap_losses.append(cap_loss)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        outputs_count = torch.stack(outputs_counts)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+        # outputs_cap_loss = torch.stack(outputs_cap_losses)
+
+        all_out = {
+            'pred_logits': outputs_class,
+            'pred_count': outputs_count,
+            'pred_boxes': outputs_coord,
+            'caption_probs': outputs_cap_probs,
+            'seq': outputs_cap_seqs,
+            'cl_match_mats': cl_match_mats}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+            if transformer_input_type == 'prior_proposals':
+                loss, _, _ = criterion(out, dt['video_target'])
+                # Random select an query from each segment
+                num_sentence = dt['cap_tensor'].shape[0]
+                num_query = hs.shape[-2]
+                num_query_interval = num_query // num_sentence
+                query_indices = []
+                for i in range(num_sentence):
+                    interval_min = i * num_query_interval
+                    interval_max = interval_min + num_query_interval
+                    sample = torch.randint(interval_min, interval_max, (hs.shape[0],))
+                    query_indices.append(sample)
+                query_indices = torch.cat(query_indices, dim=0)
+                gt_indices = torch.arange(num_sentence)
+
+                last_indices = ([(query_indices[::hs.shape[0]], gt_indices)], [None, None])
+                aux_indices = []
+                for l_id in range(hs.shape[0]-1):
+                    aux_indices.append(([(query_indices[(l_id+1)::hs.shape[0]], gt_indices)], [None, None]))
+            else:
+                loss, last_indices, aux_indices = criterion(out, dt['video_target'], others)
+            for l_id in range(hs.shape[0]):
+                hs_lid = hs[l_id]
+                reference = init_reference if l_id == 0 else inter_references[l_id - 1]
+                indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0]
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                                   others, self.opt.caption_decoder_type, indices)
+
+                l_dict = {'loss_caption': cap_loss}
+                if (self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim'):
+                    contrastive_loss = contrastive_criterion(
+                        text_embed = others['text_embed'][l_id],
+                        event_embed = others['event_embed'][l_id],
+                        matching_indices = indices,
+                        bg_embed = self.background_embed,
+                    )
+
+                    l_dict.update({'contrastive_loss': contrastive_loss})
+                if l_id != hs.shape[0] - 1:
+                    l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()}
+                loss.update(l_dict)
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+        else:
+            loss, last_indices = criterion(out, dt['video_target'], others)
+
+            l_id = hs.shape[0] - 1
+            reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            indices = last_indices[0]
+            cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                               others, self.opt.caption_decoder_type, indices)
+            l_dict = {'loss_caption': cap_loss}
+            loss.update(l_dict)
+
+            out.pop('caption_losses')
+            out.pop('caption_costs')
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+
+        return out, loss
+
+    def caption_prediction(self, cap_head, dt, hs, reference, others, captioner_type, indices=None):
+        N_, N_q, C = hs.shape
+        # all_cap_num = len(dt['cap_tensor'])
+        # if self.opt.pseudo_box_aug:
+        #     assert self.opt.use_pseudo_box
+        #     cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word)
+        #     # breakpoint()
+        #     if indices != None:
+        #         breakpoint()
+        #     dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+        #     dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+        all_cap_num = len(dt['cap_tensor'])
+        query_mask = others['proposals_mask']
+        gt_mask = dt['gt_boxes_mask']
+        mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item())
+        query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu()
+        hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C)
+
+        if indices == None:
+            row_idx, col_idx = 0, 0
+            for i in range(N_):
+                mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1
+                row_idx=row_idx + query_nums[i]
+                col_idx= col_idx + gt_nums[i]
+
+            bigids = mix_mask.nonzero(as_tuple=False)
+            feat_bigids, cap_bigids = bigids[:, 0], bigids[:, 1]
+        else:
+            # breakpoint()
+            feat_bigids = torch.zeros(sum([len(_[0]) for _ in indices])).long()
+            cap_bigids = torch.zeros_like(feat_bigids)
+            total_query_ids = 0
+            total_cap_ids = 0
+            total_ids = 0
+            max_pair_num = max([len(_[0]) for _ in indices])
+            new_hr_for_dsa = torch.zeros(N_, max_pair_num, C)  # only for lstm-dsa
+            cap_seq = dt['cap_tensor']
+            new_seq_for_dsa = torch.zeros(N_, max_pair_num, cap_seq.shape[-1], dtype=cap_seq.dtype)  # only for lstm-dsa
+            for i, index in enumerate(indices):
+                feat_ids, cap_ids = index
+                feat_bigids[total_ids: total_ids + len(feat_ids)] = total_query_ids + feat_ids
+                cap_bigids[total_ids: total_ids + len(feat_ids)] = total_cap_ids + cap_ids
+                new_hr_for_dsa[i, :len(feat_ids)] = hs[i, feat_ids]
+                new_seq_for_dsa[i, :len(feat_ids)] = cap_seq[total_cap_ids + cap_ids]
+                total_query_ids += query_nums[i]
+                total_cap_ids += gt_nums[i]
+                total_ids += len(feat_ids)
+            # if self.opt.pseudo_box_aug:
+            #     # Revise the matched targer ids for pseudo box augmentation to caption id
+            #     cap_bigids = cap_bigids // self.opt.pseudo_box_aug_num
+        cap_probs = {}
+        flag = True
+
+        if captioner_type == 'none':
+            cost_caption = torch.zeros(N_, N_q, all_cap_num,
+                                       device=hs.device)  # batch_size * num_queries * all_caption_num
+            loss_caption = torch.zeros(N_, N_q, all_cap_num, device=hs.device)
+            cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device)
+            cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device)
+            seq = torch.zeros(N_, N_q, 3, device=hs.device)
+            return cost_caption, loss_caption, cap_probs, seq
+
+        elif captioner_type in ['light']:
+            clip = hs_r.unsqueeze(1)
+            clip_mask = clip.new_ones(clip.shape[:2])
+            event = None
+        elif self.opt.caption_decoder_type == 'standard':
+            # breakpoint()
+            # assert N_ == 1, 'only support batchsize = 1'
+            if self.training:
+                # breakpoint()
+                seq = dt['cap_tensor'][cap_bigids]
+                if self.opt.caption_cost_type != 'rl':
+                    if self.opt.refine_pseudo_box: # Only training and refine_pseudo_box = True returns the raw_cap_prob
+                        cap_prob, raw_cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq)
+                        # shape: [num_sentence, max_num_word, num_vocab]
+                        # cap_prob is log_softmax(prob), raw_cap_prob is (prob)
+                        cap_probs['cap_prob_train'] = cap_prob
+                        cap_probs['raw_cap_prob'] = raw_cap_prob
+                    else:
+                        cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) 
+                        # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608], here 13 is the max length among 5 sentences
+                        cap_probs['cap_prob_train'] = cap_prob
+            else:
+                with torch.no_grad():
+                    cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others,
+                                        dt['cap_tensor'][cap_bigids])
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, N_q, seq.shape[-1])
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+
+            flag = False
+            pass
+
+        if flag:
+            clip_ext = clip[feat_bigids]
+            clip_mask_ext = clip_mask[feat_bigids]
+
+            if self.training:
+                seq = dt['cap_tensor'][cap_bigids]
+                if self.opt.caption_cost_type != 'rl':
+                    cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq)
+                    cap_probs['cap_prob_train'] = cap_prob
+            else:
+                with torch.no_grad():
+                    seq_gt = dt['cap_tensor'][cap_bigids]
+                    cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq_gt)
+                    seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask)
+
+                    if len(seq):
+                        # re_seq = torch.zeros(N_, N_q, seq.shape[-1])
+                        # re_cap_prob_eval = torch.zeros(N_, N_q, cap_prob_eval.shape[-1])
+                        seq = seq.reshape(-1, N_q, seq.shape[-1])
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+
+        if self.opt.caption_cost_type == 'loss':
+            cap_prob = cap_prob.reshape(-1, cap_prob.shape[-2], cap_prob.shape[-1]) # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608]
+            caption_tensor = dt['cap_tensor'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13]
+            caption_mask = dt['cap_mask'][:, 1:][cap_bigids]  # [num_sentence, max_num_sentence], e.g, [5, 13]
+            cap_loss = cap_head.build_loss(cap_prob, caption_tensor, caption_mask) # [num_query]
+            cap_cost = cap_loss
+        else:
+            raise AssertionError('caption cost type error')
+
+        # Calculate caption probs for each query
+        # breakpoint()
+        # if self.opt.refine_pseudo_box:
+        #     sentence_cap_prob = cap_head.build_prob(raw_cap_prob, caption_tensor, caption_mask)
+        # else:
+        sentence_cap_prob = - cap_loss
+
+        if indices:
+            return cap_loss.mean(), cap_probs, seq, sentence_cap_prob
+        # cap_loss.mean(): [num_matched_query] --> [1], 
+        # cap_probs: dict, contains 'cap_prob_train' or 'cap_prob_eval' [num_matched_query, max_length_sentence, num_word_in_vocab]
+        # seq： [num_sentence, max_length_sentence+1], here the '+1' means the 1st col is all '0'
+ 
+        cap_id, query_id = cap_bigids, feat_bigids
+        cost_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1))
+        cost_caption[query_id, cap_id] = cap_cost
+        loss_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1))
+        loss_caption[query_id, cap_id] = cap_loss
+        cost_caption = cost_caption.reshape(-1, N_q,
+                                            max(cap_id) + 1)  # batch_size * num_queries * all_caption_num
+        loss_caption = loss_caption.reshape(-1, N_q, max(cap_id) + 1)
+        return cost_caption, loss_caption, cap_probs, seq
+
+    def caption_prediction_eval(self, cap_head, dt, hs, reference, others, decoder_type, pred_num=None, indices=None):
+        assert indices == None
+        N_, N_q, C = hs.shape
+        query_mask = others['proposals_mask']
+        gt_mask = dt['gt_boxes_mask']
+        mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item())
+        query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu()
+        hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C)
+
+        row_idx, col_idx = 0, 0
+        for i in range(N_):
+            mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1
+            row_idx = row_idx + query_nums[i]
+            col_idx = col_idx + gt_nums[i]
+
+        cap_probs = {}
+
+        if decoder_type in ['none']:
+            cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device)
+            cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device)
+            seq = torch.zeros(N_, N_q, 3, device=hs.device)
+            return cap_probs, seq
+
+        elif decoder_type in ['light']:
+            clip = hs_r.unsqueeze(1)
+            clip_mask = clip.new_ones(clip.shape[:2])
+            event = None
+            seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask)
+            if len(seq):
+                seq = seq.reshape(-1, N_q, seq.shape[-1])
+                cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+            cap_probs['cap_prob_eval'] = cap_prob_eval
+
+        elif decoder_type in ['standard']:
+            assert N_ == 1, 'only support batchsize = 1'
+            with torch.no_grad():
+                if self.opt.transformer_input_type == 'prior_proposals':
+                    # hs: [bs, num_query, feat_dim]  
+                    # reference: [bs, num_query, 2]
+                    if pred_num:
+                        num_cap =  pred_num
+                    else:
+                        num_cap =  dt['cap_tensor'].shape[0]
+                    interval = N_q // num_cap
+                    pool_layer = torch.nn.AvgPool1d(interval,stride=interval)
+                    hs = pool_layer(hs.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # [batch, num_sentence, dim]
+                    reference = pool_layer(reference.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # # [batch, num_sentence, 2]
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, num_cap, seq.shape[-1]) #
+                        cap_prob_eval = cap_prob_eval.reshape(-1, num_cap, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+                else:
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, N_q, seq.shape[-1]) #
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+        return cap_probs, seq
+
+
+class PostProcess(nn.Module):
+    """ This module converts the model's output into the format expected by the coco api"""
+
+    def __init__(self, opt):
+        super().__init__()
+        self.opt = opt
+
+    @torch.no_grad()
+    def forward(self, outputs, target_sizes, loader):
+        """ Perform the computation
+        Parameters:
+            outputs: raw outputs of the model
+            target_sizes: tensor of dimension [batch_size] containing the size of each video of the batch
+        """
+        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']
+        N, N_q, N_class = out_logits.shape
+        assert len(out_logits) == len(target_sizes)
+        prob = out_logits.sigmoid() # batch, num_queries, 1
+
+        if self.opt.transformer_input_type == 'prior_proposals':
+            #topk_values = prob.view(N, N_q)
+            #topk_indexes = torch.arange(N_q, device=prob.device).unsqueeze(0).repeat(N, 1)
+            topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), outputs['seq'].shape[1], dim=1)
+        else:
+            topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), N_q, dim=1)
+        scores = topk_values
+        # topk_boxes = topk_indexes // out_logits.shape[2]
+        topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode='floor')
+        labels = topk_indexes % out_logits.shape[2]
+        boxes = box_ops.box_cl_to_xy(out_bbox)
+        raw_boxes = copy.deepcopy(boxes)
+        boxes[boxes < 0] = 0
+        boxes[boxes > 1] = 1
+        boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 2))
+
+        scale_fct = torch.stack([target_sizes, target_sizes], dim=1)
+        boxes = boxes * scale_fct[:, None, :]
+        seq = outputs['seq']  # [batch_size, num_queries, max_Cap_len=30]
+        cap_prob = outputs['caption_probs']['cap_prob_eval']  # [batch_size, num_queries]
+        eseq_lens = outputs['pred_count'].argmax(dim=-1).clamp(min=1)
+
+        if len(seq):
+            mask = (seq > 0).float()
+            # cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') / (
+            #         1e-5 + mask.sum(2).cpu().numpy().astype('float'))
+            cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float')
+            seq = seq.detach().cpu().numpy().astype('int')  # (eseq_batch_size, eseq_len, cap_len)
+            caps = [[loader.dataset.translator.rtranslate(s) for s in s_vid] for s_vid in seq]
+            if self.opt.transformer_input_type != 'prior_proposals':
+                caps = [[caps[batch][idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)]  # Re-arrange the caption order accroding to the logits
+                cap_scores = [[cap_scores[batch, idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)]
+        else:
+            bs, num_queries = boxes.shape[:2]
+            cap_scores = [[-1e5] * num_queries] * bs
+            caps = [[''] * num_queries] * bs
+
+        results = [
+            {'scores': s, 'labels': l, 'boxes': b, 'raw_boxes': b, 'captions': c, 'caption_scores': cs, 'query_id': qid,
+             'vid_duration': ts, 'pred_seq_len': sl} for s, l, b, rb, c, cs, qid, ts, sl in
+            zip(scores, labels, boxes, raw_boxes, caps, cap_scores, topk_boxes, target_sizes, eseq_lens)]
+        return results
+
+
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+
+
+def build(args):
+    device = torch.device(args.device)
+    base_encoder = build_base_encoder(args)
+    # For text encoder when using DTW matcher
+    # if args.matcher_type == 'DTW' or args.use_pseudo_box:
+    #     if args.pretrained_language_model == 'UniVL':
+    #         print('Load pretrained UniVL model weights')
+    #         text_encoder = load_pretrained_UniVL()
+    #     else:
+    #         for i in range(10):
+    #             try:
+    #                 text_encoder = AutoModel.from_pretrained(args.pretrained_language_model, cache_dir=args.huggingface_cache_dir)
+    #                 break
+    #             except:
+    #                 print('download error in AutoModel, retry...')
+    #                 time.sleep(1)
+    # else:
+    #     text_encoder = None
+
+    transformer = build_deforamble_transformer(args)
+    captioner = build_captioner(args)
+
+    model = PDVC(
+        base_encoder,
+        transformer,
+        captioner,
+        num_classes=args.num_classes,
+        num_queries=args.num_queries,
+        num_feature_levels=args.num_feature_levels,
+        aux_loss=args.aux_loss,
+        with_box_refine=args.with_box_refine,
+        opt=args
+    )
+
+    matcher = build_matcher(args)
+    if args.matcher_type == 'DTW' and args.use_anchor:
+        weight_dict = {'loss_ce': args.cls_loss_coef,
+                    'loss_bbox': args.bbox_loss_coef,
+                    'loss_giou': args.giou_loss_coef,
+                    'loss_self_iou': args.self_iou_loss_coef,
+                    'loss_ref_rank': args.ref_rank_loss_coef,
+                    'loss_counter': args.count_loss_coef,
+                    'loss_caption': args.caption_loss_coef,
+                    'contrastive_loss': args.contrastive_loss_start_coef,
+                    }
+    else:
+        weight_dict = {'loss_ce': args.cls_loss_coef,
+                    'loss_bbox': args.bbox_loss_coef,
+                    'loss_giou': args.giou_loss_coef,
+                    'loss_counter': args.count_loss_coef,
+                    'loss_caption': args.caption_loss_coef,
+                    'contrastive_loss': args.contrastive_loss_start_coef,
+                    }
+    if args.refine_pseudo_box:
+        weight_dict.update({'loss_mil': args.mil_loss_coef})
+    # TODO this is a hack
+    if args.aux_loss:
+        aux_weight_dict = {}
+        for i in range(args.dec_layers - 1):
+            aux_weight_dict.update({k + f'_{i}': v for k, v in weight_dict.items()})
+        weight_dict.update(aux_weight_dict)
+
+    losses = ['labels', 'boxes', 'cardinality']
+
+    if args.matcher_type == 'DTW' or args.matcher_type == 'Sim':
+        criterion = AlignCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha,
+                                focal_gamma=args.focal_gamma, opt=args)
+        contrastive_criterion = ContrastiveCriterion(temperature=args.contrastive_loss_temperature,
+                                                 enable_cross_video_cl=args.enable_cross_video_cl,
+                                                 enable_e2t_cl = args.enable_e2t_cl,
+                                                 enable_bg_for_cl = args.enable_bg_for_cl)
+        contrastive_criterion.to(device)
+    else:
+        criterion = SetCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha,
+                                focal_gamma=args.focal_gamma, opt=args)
+        contrastive_criterion = None
+    
+    criterion.to(device)
+    postprocessors = {'bbox': PostProcess(args)}
+
+    return model, criterion, contrastive_criterion, postprocessors
+
+
diff --git a/anet_clip/backup/pdvc/position_encoding.py b/anet_clip/backup/pdvc/position_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb71befd6e4397bd4d5a30c7a43861cea158cc7
--- /dev/null
+++ b/anet_clip/backup/pdvc/position_encoding.py
@@ -0,0 +1,76 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+
+from misc.detr_utils.misc import NestedTensor
+
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+        self.max_duration = 256
+        self.duration_embed_layer = nn.Linear(self.max_duration, self.max_duration)
+
+    def forward(self, tensor_list: NestedTensor):
+        x = tensor_list.tensors
+        mask = tensor_list.mask
+        duration = tensor_list.duration
+        assert mask is not None
+        not_mask = ~mask
+        x_embed = not_mask.cumsum(1, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            x_embed = (x_embed - 0.5) / (x_embed[:, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        # dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        dim_t = self.temperature ** (2 * (torch.div(dim_t, 2, rounding_mode='floor')) / self.num_pos_feats)
+        pos_x = x_embed[:, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+
+        dur_embed = self.duration_embedding(duration).reshape(-1,1,self.max_duration).expand_as(pos_x)
+        pos = torch.cat((pos_x, dur_embed), dim=2).permute(0, 2, 1)
+        return pos
+
+    def duration_embedding(self, durations):
+        out = torch.zeros(len(durations), self.max_duration, device=durations.device)
+        durations = durations.int()
+        for ii in range(len(durations)):
+            out[ii, :durations[ii]] = 1
+        out = self.duration_embed_layer(out)
+        return out
+
+
+
+def build_position_encoding(position_embedding, N_steps):
+    if position_embedding in ('v2', 'sine'):
+        # TODO find a better way of exposing other arguments
+        position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
+    else:
+        raise ValueError(f"not supported {position_embedding}")
+
+    return position_embedding
diff --git a/anet_clip/backup/pdvc/util.py b/anet_clip/backup/pdvc/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e489c1bce356a96116e2c13fcabc1c84d132711
--- /dev/null
+++ b/anet_clip/backup/pdvc/util.py
@@ -0,0 +1,72 @@
+import torch
+import numpy as np
+
+# def find_center_index(array: np.ndarray) -> np.ndarray:
+#     """
+#     Given a array with shape [steps, topk], find the center index between topk indexes
+#     which has the minimal average distance with other indexes.
+
+#     Args:
+#     - array: numpy array representing the input array with shape [steps, topk]
+
+#     Returns:
+#     - center_indexes: numpy array of center indexes for each step
+#     """
+
+#     distances = np.sum(np.abs(array[:, np.newaxis, :] - array[:, :, np.newaxis]), axis=2)
+#     center_indexes = np.argmin(distances, axis=1)
+
+#     return center_indexes
+
+def find_center_value(arr):
+    # Compute pairwise distances between all values
+    distances = np.abs(arr[:, np.newaxis] - arr[np.newaxis, :])
+    
+    # Sum distances for each value
+    sum_distances = np.sum(distances, axis=1)
+    
+    # Find the index of the value with the smallest sum distance
+    center_index = np.argmin(sum_distances)
+    
+    # Get the center value
+    center_value = arr[center_index]
+    
+    return center_value
+
+
+def compute_overlap(center_t, boundary_t, center_t_minus_1, boundary_t_minus_1):
+    """
+    Compute the overlap of boundaries between time t and t-1 for each element in the arrays.
+
+    Args:
+    - center_t: numpy array representing the center at time t with shape [N,]
+    - boundary_t: numpy array representing the boundary at time t with shape [N,1, candidates]
+    - center_t_minus_1: numpy array representing the center at time t-1 with shape [N,]
+    - boundary_t_minus_1: numpy array representing the boundary at time t-1 with shape [N,]
+
+    Returns:
+    - overlap: numpy array representing the overlap of boundaries with shape [N,]
+    """
+
+    boundary_t = boundary_t.squeeze(1)
+    boundary_t_minus_1 = boundary_t_minus_1.squeeze(1)
+    center_t = center_t[:, np.newaxis]
+    # breakpoint()
+    center_t_minus_1 = center_t_minus_1[:, np.newaxis]
+    # boundary_t_minus_1 = boundary_t_minus_1[:, np.newaxis]
+
+
+    # Calculate the start and end positions of the boundaries at time t and t-1
+    start_t = center_t - 0.5 * boundary_t
+    end_t = center_t + 0.5 * boundary_t
+    start_t_minus_1 = center_t_minus_1 - 0.5 * boundary_t_minus_1
+    end_t_minus_1 = center_t_minus_1 + 0.5 * boundary_t_minus_1
+
+    # Calculate the intersection and union of the boundaries
+    intersection = np.maximum(0, np.minimum(end_t, end_t_minus_1) - np.maximum(start_t, start_t_minus_1))
+    union = boundary_t + boundary_t_minus_1 - intersection
+
+    # Compute the overlap using the Intersection over Union (IoU) formula
+    overlap = intersection / union
+
+    return overlap
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/video_segmentation.py b/anet_clip/backup/pdvc/video_segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..93775df585e53204022fceb86c693516386b6023
--- /dev/null
+++ b/anet_clip/backup/pdvc/video_segmentation.py
@@ -0,0 +1,917 @@
+import torch
+import numpy as np
+
+
+from pdvc.dp.exact_dp import drop_dtw, double_drop_dtw
+from pdvc.dp.dp_utils import compute_sim
+import statistics
+from sklearn.cluster import KMeans
+from pdvc.util import find_center_value, compute_overlap
+# from config import CONFIG
+
+''' configs of original file '''
+config_eval_l2norm = True 
+config_eval_keep_percentile = 0.48
+config_eval_fixed_drop_sim = -1
+
+
+''' 
+return value:
+frame features: [num_frames, feature_dim] -> optimal_assignment: [num_steps], -1 means no match, otherwise means the index of the matched step/caption/query
+
+'''
+# filter_threshold = 0.5
+
+def clip_array(arr, threshold):
+    clipped_arr = np.where(arr > threshold, arr, threshold)
+    return clipped_arr
+
+
+# def compute_filtered_indices(topk_indices_list, topk_values_list, scale=0.5):
+#     # center_indices = []
+#     # boundary_widths = []
+#     filtered_indices_list = []
+#     for topk_indices, topk_values in zip(topk_indices_list, topk_values_list):
+#         center_index = find_center_value(topk_indices)
+#         std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / sum(topk_values)) ** 0.5
+#         boundary_width = std_index * scale
+#         filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width]
+#         filtered_indices_list.append(filtered_indices)
+#         # center_indices.append(center_index)
+#         # boundary_widths.append(boundary_width)
+
+#     return filtered_indices_list
+
+def compute_filtered_indices(topk_indices, topk_values, threshold=0.5):
+    center_index = find_center_value(np.array(topk_indices))
+    std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / (sum(topk_values) + 1e-5)) ** 0.5
+    boundary_width = std_index * threshold
+    filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width]
+    return filtered_indices 
+
+def compute_bbox_loss(index_list, box, similarity_values):
+    left, right = box
+    distances = []
+
+    for i, index in enumerate(index_list):
+        if left <= index <= right:
+            distance = -min(index - left, right - index)
+        else:
+            distance = max(left - index, index - right)
+        
+        weighted_distance = similarity_values[i] * distance
+        distances.append(weighted_distance)
+
+    return sum(distances)
+
+
+
+
+
+def remove_outliers(indices, threshold, mode, w):
+    # Calculate the mean and standard deviation of the indices
+    if mode == 'median':
+        median = statistics.median(indices)
+    elif mode == 'mean':
+        mean = sum(indices) / len(indices)
+    elif mode == 'mode':
+        count_dict = {}
+        for p in range(min(indices), max(indices) + 1):
+            # print(p)
+            count = sum(1 for c in indices if p - w <= c <= p + w)
+            count_dict[p] = count
+
+        max_count = max(count_dict.values())
+        best_p_values = [p for p, count in count_dict.items() if count == max_count]
+        if len(best_p_values) % 2 == 0:
+            best_p_values.pop()
+        
+        mode_value = statistics.median(best_p_values)
+    std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5
+
+    # if mode == 'mode':
+    #     '''get mode-similar statistics'''
+    #     count_dict = {}
+    #     for p in range(min(indices), max(indices) + 1):
+    #         # print(p)
+    #         count = sum(1 for c in indices if p - w <= c <= p + w)
+    #         count_dict[p] = count
+
+    #     max_count = max(count_dict.values())
+    #     best_p_values = [p for p, count in count_dict.items() if count == max_count]
+    #     if len(best_p_values) % 2 == 0:
+    #         best_p_values.pop()
+        
+    #     mode_value = statistics.median(best_p_values)
+
+    # Calculate the threshold for identifying outliers
+    threshold_value = threshold * std_dev
+
+    # Filter out indices that are far from the mean
+    # breakpoint()
+
+    if mode == 'median':
+        filtered_indices = [i for i in indices if abs(i - median) <= threshold_value]
+    elif mode == 'mode':
+        filtered_indices = [i for i in indices if abs(i - mode_value) <= threshold_value]
+    return filtered_indices
+
+
+def remove_outliers_v1(indices, threshold):
+    pass 
+
+def get_mode(indices, w):
+    count_dict = {}
+    for p in range(min(indices), max(indices) + 1):
+        # print(p)
+        count = sum(1 for c in indices if p - w <= c <= p + w)
+        count_dict[p] = count
+
+    max_count = max(count_dict.values())
+    best_p_values = [p for p, count in count_dict.items() if count == max_count]
+    if len(best_p_values) % 2 == 0:
+        best_p_values.pop()
+    
+    mode_value = statistics.median(best_p_values)
+    return mode_value
+
+def get_mode_box(sim, topk, w, ratio): # topk选择20 ratio 1 
+    ''' 注意这里算中心的时候使用前topk是因为更相信前topk的准确率 但是确定中心以后需要找边界 就需要使用全部的'''
+    avg_caption_length = sim.shape[1] // sim.shape[0]
+    sorted_idx = torch.argsort(-sim, dim=1)
+    top_indices = sorted_idx[:, :topk]
+    # top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True)
+    # top_indices_half = top_indices[:, :topk//2]
+    top_cap_indices = sorted_idx[:, :avg_caption_length]
+    # sorted_idx = torch.argsort(-sim, dim=1)
+    width = int(ratio * avg_caption_length / 2) # ratio选择1
+    
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        # index_list = top_indices[i].tolist()
+        mode_value = get_mode(top_indices[i].tolist(), w)
+        filtered_indices = [i for i in top_cap_indices[i].tolist() if abs(i - mode_value) <= width]
+
+        # if len(filtered_indices) == 0:
+        #     filtered_indices = remove_outliers(sim[i].tolist(), top_indices[i].tolist(), 0.5, mode='median', w=w)
+        #     if len(filtered_indices) == 0:
+        #         bbox.append([0, sim.shape[1] - 1])
+        #         continue
+        if len(filtered_indices) == 0:
+            bbox.append([mode_value-width, mode_value+width])
+        else:
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+def compute_threshold(data, threshold):
+    mean = sum(data) / len(data)
+    std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
+    threshold_value = threshold * std_dev
+    return threshold_value
+
+
+# using similarity as weight to find center
+''' find center globally, then find the boundary locally. 
+    1. find center: use the similarity as weight to find the center
+    2. find boundary: use the center to find the boundary. steps are '''
+def step_retrieval_weight_sim(frame_features, step_features, topk=15, threshold=0.5, w=2):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # sim sum along a window
+    window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+
+    if len(window_sums.shape) == 1:
+        window_sums = window_sums.unsqueeze(0)
+        flag = 1
+    else:
+        flag = 0 
+
+    top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True)
+    # breakpoint()
+
+    # Find the frame with the maximum sum in each step
+    _, step_center_frames = window_sums.max(dim=1)
+    step_center_frames = step_center_frames.squeeze()
+
+    if flag == 1:
+        step_center_frames = step_center_frames.unsqueeze(0).tolist()
+    else:
+        step_center_frames = step_center_frames.tolist()
+
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        threshold_value = compute_threshold(top_indices[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value]
+        if len(filtered_indices) == 0:
+            bbox.append([step_center_frames[i] - w, step_center_frames[i] + w])
+        else:
+            bbox.append([w + min(filtered_indices), w + max(filtered_indices)])
+    
+    return bbox
+
+''' TODO: get the right weight using index'''
+def step_retrieval_weight_index(frame_features, step_features, topk=15, threshold=0.5, w=2):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    sorted_idx = torch.argsort(-similarity_matrix, dim=1)
+    # sim sum along a window
+    window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+
+    top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True)
+    # breakpoint()
+
+    # Find the frame with the maximum sum in each step
+    _, step_center_frames = window_sums.max(dim=1)
+    step_center_frames = step_center_frames.squeeze().tolist()
+
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        threshold_value = compute_threshold(top_indices[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value]
+        bbox.append([w + min(filtered_indices), w + max(filtered_indices)])
+    
+    return bbox
+
+def uniform_box(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    return uniform_boxes
+    
+
+def align_frame_into_steps(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    average_width = int(sim.shape[1] // sim.shape[0] / 2)
+    # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # bbox = get_mode_box(sim, topk, w, ratio)
+
+    top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True)
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        filtered_indices = remove_outliers(top_indices[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) < 2:
+            filtered_indices = remove_outliers(top_indices[i].tolist(), 2*threshold, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append([top_indices[0] - average_width, top_indices[0] + average_width])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+# use optimization to compute pseudo boundary
+def align_frame_into_steps_op(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1):
+    # frame_features:  torch.Size([200, 768])
+    augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    # breakpoint()
+    # [#step, #frame]
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+    num_steps, num_frames = similarity_matrix.shape
+
+    # Select top-k frames for each caption [#step, #topk]
+    sorted_indices = np.argsort(similarity_matrix, axis=1)
+    # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+    # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+    # Compute center indexes [#step, 1]
+    
+
+    # Update boundary width 
+    initial_boundary_width = num_frames / num_steps # 1
+    # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+    # overlap = np.zeros(num_steps)
+
+    for i in range(num_iterations):
+        if i == 0 and not order:
+            boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+            topk_indices = [index[-topk:] for index in sorted_indices]
+            topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+            previous_index_center = None
+        #     # overlap_weight = 0
+        else:
+            if i == 0:
+                segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int)
+                start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:]
+                start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames)
+                boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1)
+
+                filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)]
+                if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4:
+                    boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+                    topk_indices = [index[-topk:] for index in sorted_indices]
+                    topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+                    center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+                    previous_index_center = None
+            else:
+                boundary_width_last = boundary_width.reshape(-1, 1, 1)
+                start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames)
+
+            topk_indices = []
+            topk_values = []
+            for j, (start, end) in enumerate(zip(start_indices, end_indices)):
+                # breakpoint()
+                filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)]
+                topk_index = filtered_indices[-topk:]
+                topk_indices.append(topk_index)
+                topk_values.append(similarity_matrix[j][topk_index])
+            previous_index_center = center_indexes.copy() if i > 0 else None
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+
+            # top_indices = sorted_indices[:, ]
+            # previous_index_center = center_indexes
+            # # overlap_weight = 0.5 * np.sum(overlap)
+        
+        boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+        # breakpoint()
+
+        index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]]
+
+        loss_candidates_list = [value[:, np.newaxis] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+        # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+
+
+        # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+        # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+        loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates]
+
+        if i == 0:            
+            loss = loss_sim
+            # print('loss shape:', loss_sim.shape, loss.shape)
+        else:
+            # measure the overlap between boundaries given center and boundary width
+            overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+            # breakpoint()
+            # print(loss_sim.shape, overlap.shape)
+            loss = loss_sim + beta * overlap 
+            # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim))
+            # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+        # find the best boundary width
+        # breakpoint()
+        best_boundary_width_index = np.argmin(loss, axis=1) # [#step]
+
+        # Use broadcasting to create row indices corresponding to each row
+        # row_indices = np.arange(num_steps)[:, np.newaxis]
+        # breakpoint()
+        # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+        boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step]
+        # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step]
+        boundary_width = np.array(boundary_width)
+        # print(boundary_width.shape)
+
+    bbox = []
+    left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames)
+    right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames)
+    # breakpoint()
+    bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int)
+
+    return bbox.tolist()
+
+# use optimization to compute pseudo boundary
+def align_frame_into_steps_op_v1(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1):
+    # frame_features:  torch.Size([200, 768])
+    augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    # breakpoint()
+    # [#step, #frame]
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+    num_steps, num_frames = similarity_matrix.shape
+
+    # Select top-k frames for each caption [#step, #topk]
+    sorted_indices = np.argsort(similarity_matrix, axis=1)
+    # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+    # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+    # Compute center indexes [#step, 1]
+    
+
+    # Update boundary width 
+    initial_boundary_width = num_frames / num_steps # 1
+    # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+    # overlap = np.zeros(num_steps)
+
+    for i in range(num_iterations):
+        if i == 0 and not order:
+            boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+            topk_indices = [index[-topk:] for index in sorted_indices]
+            topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+            previous_index_center = None
+        #     # overlap_weight = 0
+        else:
+            if i == 0:
+                segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int)
+                start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:]
+                start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames)
+                boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1)
+
+                filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)]
+                if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4:
+                    boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+                    topk_indices = [index[-topk:] for index in sorted_indices]
+                    topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+                    center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+                    previous_index_center = None
+            else:
+                boundary_width_last = boundary_width.reshape(-1, 1, 1)
+                start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames)
+
+            topk_indices = []
+            topk_values = []
+            for j, (start, end) in enumerate(zip(start_indices, end_indices)):
+                # breakpoint()
+                filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)]
+                topk_index = filtered_indices[-topk:]
+                topk_indices.append(topk_index)
+                topk_values.append(similarity_matrix[j][topk_index])
+            previous_index_center = center_indexes.copy() if i > 0 else None
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+
+            # top_indices = sorted_indices[:, ]
+            # previous_index_center = center_indexes
+            # # overlap_weight = 0.5 * np.sum(overlap)
+        
+        boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+        # breakpoint()
+
+        index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]]
+        
+        weight_distance = [clip_array(index_distance[i], 0.5 * boundary_width_candidates[i]) for i in range(len(topk_indices))] # [[topk, 1]]
+
+        loss_candidates_list = [value[:, np.newaxis] / weight_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+        # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+
+
+        # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+        # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+        loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates]
+
+        if i == 0:            
+            loss = loss_sim
+            # print('loss shape:', loss_sim.shape, loss.shape)
+        else:
+            # measure the overlap between boundaries given center and boundary width
+            overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+            # breakpoint()
+            # print(loss_sim.shape, overlap.shape)
+            loss = loss_sim + beta * overlap 
+            # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim))
+            # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+        # find the best boundary width
+        # breakpoint()
+        best_boundary_width_index = np.argmin(loss, axis=1) # [#step]
+
+        # Use broadcasting to create row indices corresponding to each row
+        # row_indices = np.arange(num_steps)[:, np.newaxis]
+        # breakpoint()
+        # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+        boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step]
+        # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step]
+        boundary_width = np.array(boundary_width)
+        # print(boundary_width.shape)
+
+    bbox = []
+    left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames)
+    right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames)
+    # breakpoint()
+    bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int)
+
+    return bbox.tolist()
+
+
+
+
+
+# # use optimization to compute pseudo boundary
+# def align_frame_into_steps_op_order(frame_features, step_features, topk=15, threshold=0.5, num_iterations=4, beta=1):
+#     # frame_features:  torch.Size([200, 768])
+#     augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+#     if step_features.shape[0] == 0:
+#         return -np.ones(frame_features.shape[0])
+    
+#     # breakpoint()
+#     # [#step, #frame]
+#     similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+#     num_steps, num_frames = similarity_matrix.shape
+
+#     # Select top-k frames for each caption [#step, #topk]
+#     top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+#     top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+#     # Compute center indexes [#step, 1]
+#     center_indexes = find_center_index(top_indices)[:, np.newaxis]
+
+#     # Update boundary width 
+#     initial_boundary_width = num_frames / num_steps # 1
+#     # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+#     # overlap = np.zeros(num_steps)
+
+#     for i in range(num_iterations):
+#         if i == 0:
+#             boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+#         #     previous_index_center = None
+#         #     # overlap_weight = 0
+#         else:
+#             boundary_width_last = boundary_width.reshape(-1, 1, 1)
+#             previous_index_center = center_indexes
+#             # overlap_weight = 0.5 * np.sum(overlap)
+        
+#         boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+
+#         index_distance = np.abs(top_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+#         loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+
+#         if i == 0:
+#             loss = loss_sim # # [#step, #candidates]
+#             print('loss shape:', loss_sim.shape, loss.shape)
+#         else:
+#             # measure the overlap between boundaries given center and boundary width
+#             overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+#             loss = loss_sim + beta * overlap 
+#             print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+#         # find the best boundary width
+#         # breakpoint()
+#         best_boundary_width = np.argmin(loss, axis=1) # [#step]
+#         # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+#         boundary_width = boundary_width_candidates[:,0][np.arange(num_steps), best_boundary_width] # [#step]
+#         # print(boundary_width.shape)
+
+#     return center_indexes, boundary_width
+# based on original code but change the method to compute center and std
+def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    sorted_index = torch.argsort(-sim, dim=1)
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])]
+
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+
+    iter_bbox_loss = {}
+    for iter in range(iteration):
+        if iter == 0:
+            refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+        else:
+            refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox
+
+
+        # global: from all frames, local: from refined uniform boxes
+    
+        top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+        top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])]
+
+        size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+        if sum(size_local) < (topk-2) * len(size_local):
+            top_indices_list = top_indices_list_global
+            top_values_list = top_values_list_global
+        else:
+            top_indices_list = top_indices_list_local
+            top_values_list = top_values_list_local
+
+        # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+        bbox = []
+        for i in range(len(top_indices_list)):
+            filtered_indices = compute_filtered_indices(top_indices_list[i].tolist(), top_values_list[i].tolist(), threshold)
+            if len(filtered_indices) == 0:
+                filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold)
+                if len(filtered_indices) == 0:
+                    bbox.append(uniform_boxes[i])
+                    continue
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+
+        # compute bbox loss
+        bbox_loss_list = [compute_bbox_loss(top_indices_list[i], bbox[i], top_values_list[i]) for i in range(len(top_indices_list))]
+        bbox_loss = sum(bbox_loss_list)
+        iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox}
+
+    # select the minimum bbox loss and bbox as output
+    min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss'])
+    min_loss = iter_bbox_loss[min_loss_iter]['loss']
+    best_bbox = iter_bbox_loss[min_loss_iter]['bbox']
+        
+
+    return (best_bbox, min_loss)
+
+
+
+# pesudo box 4: based on fixed window. the result is bad. give up
+def align_frame_into_steps_mode(frame_features, step_features, topk=15, w=2, ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+
+    bbox = get_mode_box(sim, topk, w, ratio)
+    return bbox
+
+def uniform_window(frame_num, step_num):
+    uniform_timestamps = torch.linspace(0, frame_num, step_num + 1)
+    uniform_timestamps = torch.round(uniform_timestamps).int().tolist()
+    bbox = []
+    for i in range(step_num):
+        bbox.append([uniform_timestamps[i], uniform_timestamps[i+1] - 1])
+
+    # window_size = frame_num // step_num
+    # bbox = []
+    # for i in range(step_num):
+    #     bbox.append([i * window_size, (i + 1) * window_size - 1])
+    # bbox[-1][1] = frame_num - 1
+    return bbox 
+
+def expand_window(uniform_bbox, frame_num, step_num, ratio=1):
+    '''ratio: gt box相对uniform box的波动范围 超过这个范围视为不可能 ratio单位为一个caption的平均长度'''
+    window_size = frame_num // step_num
+    refined_bbox = []
+    for bbox in uniform_bbox:
+        start = max(0, bbox[0] - ratio * window_size)
+        end = min(frame_num - 1, bbox[1] + ratio * window_size)
+        refined_bbox.append([start, end])
+    return refined_bbox
+
+# pesudo box 3: based on sim, consider the order of steps
+def align_frame_into_steps_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+    
+    # old setting (index is wrong)
+    # # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])]
+    # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list]
+    # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+
+    sorted_index = torch.argsort(-sim, dim=1)
+    # global: from all frames, local: from refined uniform boxes
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-2) * len(size_local):
+        top_indices_list = top_indices_list_global
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) == 0:
+            filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append(uniform_boxes[i])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+
+
+
+# based on pbox3, if ratio 1 has enough value, use it otherwise
+def align_frame_into_steps_order_adapt(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+    
+    # old setting (index is wrong)
+    # # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])]
+    # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list]
+    # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+
+    sorted_index = torch.argsort(-sim, dim=1)
+    # global: from all frames, local: from refined uniform boxes
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-1) * len(size_local):
+        flag = 0
+        for i in range(4):
+            refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio+i*0.5)
+            top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+            size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+            if sum(size_local) >= (topk-1) * len(size_local):
+                flag = 1
+                break
+        if flag == 0:
+            top_indices_list = top_indices_list_global
+        else:
+            top_indices_list = top_indices_list_local
+
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) == 0:
+            filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append(uniform_boxes[i])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+def step_retrieval_weight_sim_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # breakpoint()
+
+    window_sums = torch.nn.functional.conv1d(sim.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+    if len(window_sums.shape) == 1:
+        window_sums = window_sums.unsqueeze(0)
+
+
+    sorted_index = torch.argsort(-window_sums, dim=1) + w
+    
+    
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+    
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-2) * len(size_local):
+        top_indices_list = top_indices_list_global
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        threshold_value = compute_threshold(top_indices_list[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices_list[i].tolist() if abs(frame - top_indices_list[i][0]) <= threshold_value]
+        if len(filtered_indices) == 0:
+            bbox.append([top_indices_list[i] - w, top_indices_list[i] + w])
+        else:
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+# pesudo box 0: based on dtw
+def segment_video_into_steps(frame_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)  # 直接找与每个step最匹配的frame 这样原则上是一对一匹配
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度
+    return optimal_assignment
+
+def align_query_into_steps(query_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(query_features.shape[0])
+
+    sim = compute_sim(step_features, query_features, config_eval_l2norm).cpu()
+    query_features, step_features = query_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)  # 直接找与每个step最匹配的frame 这样原则上是一对一匹配
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, one_to_one=True, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度
+    return optimal_assignment
+
+# inference时 video和slots之间的匹配
+def segment_video_into_slots(video_features, pred_steps):
+    sim = compute_sim(pred_steps, video_features, l2_norm=config_eval_l2norm).detach()
+    if config_eval_fixed_drop_sim == -1:
+        k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+        baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    else:
+        baseline_logit = torch.tensor(config_eval_fixed_drop_sim)
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+    x_drop_costs = -baseline_logits.squeeze()
+    zx_costs = -sim
+
+    z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze()
+    zx_costs = zx_costs - z_drop_costs[0].reshape([1, 1])
+    z_drop_costs = z_drop_costs - z_drop_costs[0]
+    x_drop_costs = x_drop_costs - x_drop_costs[0]
+    segmentation = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy(), return_labels=True) - 1
+    return segmentation
+
+
+# get_index and alignment_to_boundary are used for 'align' based manner
+def get_index(alignment):
+    start_idx, end_idx = [], []
+    for i in range(len(alignment)):
+        if alignment[i] == -1:
+            if i != 0 and alignment[i-1] != -1:
+                end_idx.append(i-1)
+            continue
+        if i == 0:
+            start_idx.append(i)
+        elif alignment[i] != alignment[i-1]:
+            start_idx.append(i)
+            if alignment[i-1] != -1:
+                end_idx.append(i-1)
+        if i == len(alignment) - 1:
+            end_idx.append(i)
+    assert len(start_idx) == len(end_idx)
+    for s, e in zip(start_idx, end_idx):
+        assert alignment[s] <= alignment[e]
+    return start_idx, end_idx
+
+def alignment_to_boundary(alignment, video_frame_num):
+    start_idx, end_idx = get_index(alignment)
+    start_time = start_idx / video_frame_num
+    end_time = end_idx / video_frame_num
+    boundaries = list(zip(start_time, end_time))
+
+    return np.float32(np.stack(boundaries, axis=0))
+
+
+def to_center_duration(alignments):
+    new_alignments = []
+    for alignment in alignments:
+        start, end = alignment[:, 0], alignment[:, 1]
+        center = (start + end) / 2
+        duration = end - start
+        alignment[:, 0], alignment[:, 1] = center, duration
+        new_alignments.append(alignment)
+    return new_alignments
\ No newline at end of file
diff --git a/anet_clip/backup/pdvc/video_segmentation_ori.py b/anet_clip/backup/pdvc/video_segmentation_ori.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d06e59f3b5a80fb4e8a765d20287175b03568d4
--- /dev/null
+++ b/anet_clip/backup/pdvc/video_segmentation_ori.py
@@ -0,0 +1,127 @@
+import torch
+import numpy as np
+import statistics
+
+from pdvc.dp.exact_dp import drop_dtw
+from pdvc.dp.dp_utils import compute_sim
+import statistics
+from sklearn.cluster import KMeans
+
+
+config_eval_l2norm = True 
+config_eval_keep_percentile = 0.48 # Calculated from the data
+config_eval_fixed_drop_sim = -1 
+
+def segment_video_into_steps(frame_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+
+    sim = compute_sim(step_features, frame_features, l2_norm=True).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1
+    return optimal_assignment # [num_frames]
+
+def get_index(alignment):
+    start_idx, end_idx = [], []
+    for i in range(len(alignment)):
+        if alignment[i] == -1:
+            if i != 0 and alignment[i-1] != -1:
+                end_idx.append(i-1)
+            continue
+        if i == 0:
+            start_idx.append(i)
+        elif alignment[i] != alignment[i-1]:
+            start_idx.append(i)
+            if alignment[i-1] != -1:
+                end_idx.append(i-1)
+        if i == len(alignment) - 1:
+            end_idx.append(i)
+    assert len(start_idx) == len(end_idx)
+    for s, e in zip(start_idx, end_idx):
+        assert alignment[s] <= alignment[e]
+    return start_idx, end_idx
+
+def get_index_update(alignment):
+    optimal_alignment = np.append(np.insert(alignment, 0, -1), -1)
+    diff_optimal_alignment = np.diff(optimal_alignment)
+
+    optimal_alignment_end = optimal_alignment.copy()
+    optimal_alignment_end[optimal_alignment_end==-1] = max(optimal_alignment_end) + 1
+    diff_optimal_alignment_end = np.diff(optimal_alignment_end)
+
+    start_idx = np.where(diff_optimal_alignment>0)[0]
+    end_idx = np.where(diff_optimal_alignment_end>0)[0] - 1
+    return start_idx, end_idx
+
+def alignment_to_boundary(alignment, video_frame_num):
+    start_idx, end_idx = get_index(alignment)
+    start_time = start_idx / video_frame_num
+    end_time = end_idx / video_frame_num
+    boundaries = list(zip(start_time, end_time))
+
+    return np.float32(np.stack(boundaries, axis=0))
+
+
+def to_center_duration(alignments):
+    new_alignments = []
+    for alignment in alignments:
+        start, end = alignment[:, 0], alignment[:, 1]
+        center = (start + end) / 2
+        duration = end - start
+        alignment[:, 0], alignment[:, 1] = center, duration
+        new_alignments.append(alignment)
+    return new_alignments
+
+
+def remove_outliers(indices, threshold):
+    # Calculate the mean and standard deviation of the indices
+    median = statistics.median(indices)
+    mean = sum(indices) / len(indices)
+    std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5
+
+    # Calculate the threshold for identifying outliers
+    threshold_value = threshold * std_dev
+
+    # Filter out indices that are far from the mean
+    filtered_indices = [i for i in indices if abs(i - median) <= threshold_value]
+
+    return filtered_indices
+
+
+def align_frame_into_steps(frame_features, step_features, unordered=False, k=15, threshold=0.5):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, True).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        filtered_indices = remove_outliers(top_indices[i].tolist(), threshold)
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+if __name__ == '__main__':
+    # frame_features = torch.randn(100, 768)
+    # text_features = torch.randn(8, 768)
+    # alignment = segment_video_into_steps(frame_features, text_features)
+    # breakpoint()
+    arr = [-1,-1,0,1,2,2,2,-1,-1,3,4,4,-1,-1,5,5,5,-1,6,6,7,-1,-1, 8, 8, 9]
+    start, end = get_index(arr)
+    start_1, end_1 = get_index_update(arr)
+    # start = [2, 3, 4, 8, 9, 13, 16, 18]
+    # end = [2, 3, 5, 8, 10, 15, 17, 18]
+    breakpoint()
diff --git a/anet_clip/backup/test.py b/anet_clip/backup/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1dcf9d7be821a3db142566cb23914ea96f1c064
--- /dev/null
+++ b/anet_clip/backup/test.py
@@ -0,0 +1,64 @@
+# from pdvc.video_segmentation import align_frame_into_steps_op
+# import torch 
+
+# # create two tensors
+# frame = torch.rand(200, 768)
+# steps = torch.rand(10, 768)
+
+# bboxs = align_frame_into_steps_op(frame, steps, order=False)
+# # breakpoint()
+# print('done!')
+
+
+# ==================================================================
+# import json 
+
+# filepath = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1/info.json"
+# with open(filepath, 'r') as f:
+#     data = json.load(f)
+
+# val_history = data['history']['val_result_history']
+
+# metric_sum = {}
+# metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+# for k, v in val_history.items():
+#     metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+#     print(f"{k}: {metric_sum[k]}")
+
+# best_epoch = max(metric_sum, key=metric_sum.get)
+# print(val_history[best_epoch]['eval_score'])
+# # write the val_history to a file
+# with open('val.log', 'w') as f:
+#     for k, v in val_history[best_epoch]['eval_score'].items():
+#         f.write(f"{k}: {v}\n")
+# # print(metric_sum)
+# # breakpoint()
+# print('done!')
+
+# ==================================================================
+import os 
+import json
+import sys 
+sys.path.append('/mnt/data/Gvlab/wuhao/code/dibs')
+from misc.utils import create_logger
+save_folder = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1"
+
+val_logger = create_logger(save_folder, 'val.log')
+infos_path = os.path.join(save_folder, 'info.json')
+
+with open(infos_path, 'r') as f:
+    data = json.load(f)
+val_history = data['history']['val_result_history']
+
+metric_sum = {}
+metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+for k, v in val_history.items():
+    metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+    # print(f"{k}: {metric_sum[k]}")
+
+best_epoch = max(metric_sum, key=metric_sum.get)
+best_val_score = val_history[best_epoch]['eval_score']
+val_logger.info(f"Best epoch: {best_epoch}")
+print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+val_logger.info('\nBest Model Performance:\n' + print_info)
+val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
\ No newline at end of file
diff --git a/anet_clip/backup/train.py b/anet_clip/backup/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..8777c91ee32ec28365e2c7579d3d84fab8571135
--- /dev/null
+++ b/anet_clip/backup/train.py
@@ -0,0 +1,580 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"):
+    elements = []
+    # breakpoint()    
+    if len(opt.train_caption_file) == 2:
+        if 'puyu' in opt.train_caption_file[0]:
+            elements.append('howto_puyu')
+        elif 'mix' in opt.train_caption_file[0]:
+            elements.append('howto_mix')
+        else:
+            elements.append('howto_llama2')
+        elements.append('howto')
+        if 'yc2' in opt.train_caption_file[1]:            
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file[1]:
+            elements.append('anet')
+    else:
+        if 'yc2' in opt.train_caption_file:
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file:
+            elements.append('anet')
+        elif 'howto' in opt.train_caption_file:
+            if 'puyu' in opt.train_caption_file:
+                elements.append('howto_puyu')
+            elif 'mix' in opt.train_caption_file:
+                elements.append('howto_mix')
+            else:
+                elements.append('howto_llama2')
+            # elements.append('howto')
+
+    if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]:
+        elements.append('clip')
+    elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]:
+        elements.append('univl')
+    # add pbox parameters
+    pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop"
+    elements.append(pbox_type)
+    elements.append(f"top{opt.top_frames}")
+    elements.append(f"r{opt.width_ratio}")
+    elements.append(f"iter{opt.iteration}")
+    elements.append(f"th{opt.width_th}")
+    return os.path.join(save_folder, '_'.join(elements) + '.json')
+    
+
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            
+            
+    if path == path_backup:
+        if path.startswith('/mnt/data'):
+            pass
+        else:
+            # path = '/mnt' + path[6:]
+            print('map failed')
+            exit(1)
+    return path
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # try to load saved pbox
+    saved_path = construct_save_path(opt)
+    if os.path.exists(saved_path):
+        try:
+            with open(saved_path, 'r') as f:
+                model.pseudo_boxes = json.load(f)
+        except:
+            # delete the bad file
+            os.remove(saved_path)
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # if dt['video_key'][0] != 'LGArj9Do0xc':
+            #     continue
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            # if trained_samples < 1714:
+            #     trained_samples += 1
+            #     continue
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+            try:
+                output, loss = model(dt, criterion, contrastive_criterion)
+            except Exception as e:
+                print(e)
+                print(dt['video_key'])
+                continue
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+            # breakpoint()
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+
+        if epoch == 1 and model.pseudo_boxes is not None and 'hyper' not in opt.train_caption_file[0]:
+            # save the pseudo boxes
+            pbox_save_path = construct_save_path(opt)
+            if not os.path.exists(pbox_save_path):
+                with open(pbox_save_path, 'w') as f:
+                    json.dump(model.pseudo_boxes, f)
+
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # save the pesudo box
+
+
+
+            # # ===============================old code==============================================
+            # # load Best model and conduct evaluation
+            # print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            # val_logger = create_logger(save_folder, 'val.log')
+            # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            # model.load_state_dict(loaded_pth['model'], strict=True)
+            # model.eval()
+            # result_json_path = saved_info['best']['result_json_path']
+            # eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            # if opt.caption_decoder_type == 'none':
+            #     current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            # else:
+            #     if opt.criteria_for_best_ckpt == 'dvc':
+            #         current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+            #     else:
+            #         current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            # val_logger.info('\nBest Model Performance:\n' + print_info)
+            # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            # tf_writer.close()
+            # break
+            # =================================new code=========================================================
+            val_logger = create_logger(save_folder, 'val.log')
+            infos_path = os.path.join(save_folder, 'info.json')
+
+            with open(infos_path, 'r') as f:
+                data = json.load(f)
+            val_history = data['history']['val_result_history']
+
+            metric_sum = {}
+            metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+            for k, v in val_history.items():
+                metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+                # print(f"{k}: {metric_sum[k]}")
+
+            best_epoch = max(metric_sum, key=metric_sum.get)
+            best_val_score = val_history[best_epoch]['eval_score']
+            val_logger.info(f"Best epoch: {best_epoch}")
+            print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
+
+            break           
+            
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    # breakpoint()
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_fewshot.py b/anet_clip/backup/train_fewshot.py
new file mode 100644
index 0000000000000000000000000000000000000000..db60bfe68fc32d3da5df89f5af1201a7151a3e8a
--- /dev/null
+++ b/anet_clip/backup/train_fewshot.py
@@ -0,0 +1,482 @@
+# use ft_gt_percent to control the percentage of gt proposals used for finetuning
+
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_dataset = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # if dt['video_key'][0] != 'LGArj9Do0xc':
+            #     continue
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            # if trained_samples < 1714:
+            #     trained_samples += 1
+            #     continue
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_ft.py b/anet_clip/backup/train_ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdcc497f763607f28dfb1e0a687705c42e448a09
--- /dev/null
+++ b/anet_clip/backup/train_ft.py
@@ -0,0 +1,513 @@
+# coding:utf-8
+
+''' 
+train_seq2.py is different from train_seq.py in the following aspects:
+
+1. train_seq2.py uses the same dataset for pretraining and target task
+2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs
+3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task
+4. checkpoint is located in save howto_yc2_* or howto_tasty_* 
+5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_*
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet'))
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder):
+        print('the checkpoint folder does not exist')
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq2-ft'
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_ft2_gt.py b/anet_clip/backup/train_ft2_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b007713a2ebbdae00dd0edaef54c41a3260279dd
--- /dev/null
+++ b/anet_clip/backup/train_ft2_gt.py
@@ -0,0 +1,587 @@
+# coding:utf-8
+
+''' 
+similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions)
+
+set pretrain_data_mode to 'single', it is same as train_ft_gt.py.
+
+使用全部的howto subset数据进行pretrain， 然后用部分的gt数据进行fine-tune
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single'
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            
+            
+    if path == path_backup:
+        if path.startswith('/mnt/data'):
+            pass
+        else:
+            # path = '/mnt' + path[6:]
+            print('map failed')
+            exit(1)
+    return path
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+    opt.refine_pseudo_box = False
+    opt.pseudo_box_aug = False
+    # breakpoint()
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) # .replace('_seq2-ft', '')
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'vlep-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('vlep-yc2_yc2', 'vlep_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet'))
+    
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+    # breakpoint()
+
+    if opt.id_ori != '':
+        checkpoint_folder = checkpoint_folder + '_' + opt.id_ori
+    # breakpoint()
+    # if opt.id == "":
+    #     pass
+    # else:
+    #     checkpoint_folder = checkpoint_folder + '_' + opt.id
+
+    if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_es20'):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # # load Best model and conduct evaluation
+            # print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            # val_logger = create_logger(save_folder, 'val.log')
+            # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            # model.load_state_dict(loaded_pth['model'], strict=True)
+            # model.eval()
+            # result_json_path = saved_info['best']['result_json_path']
+            # eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            # if opt.caption_decoder_type == 'none':
+            #     current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            # else:
+            #     if opt.criteria_for_best_ckpt == 'dvc':
+            #         current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+            #     else:
+            #         current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            # val_logger.info('\nBest Model Performance:\n' + print_info)
+            # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            # tf_writer.close()
+            # break
+
+            val_logger = create_logger(save_folder, 'val.log')
+            infos_path = os.path.join(save_folder, 'info.json')
+
+            with open(infos_path, 'r') as f:
+                data = json.load(f)
+            val_history = data['history']['val_result_history']
+
+            metric_sum = {}
+            metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+            for k, v in val_history.items():
+                metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+                # print(f"{k}: {metric_sum[k]}")
+
+            best_epoch = max(metric_sum, key=metric_sum.get)
+            best_val_score = val_history[best_epoch]['eval_score']
+            val_logger.info(f"Best epoch: {best_epoch}")
+            print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
+
+            break 
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id_ori = opt.id
+    
+    
+    opt.id = 'seq2-ft({})-gt_percent-{}'.format(pretrain_data_mode, opt.ft_gt_percent)
+    if opt.id_ori != '':
+        opt.id = opt.id + '_' + opt.id_ori
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_ft_gt.py b/anet_clip/backup/train_ft_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b481c6eb9a19299b401fbe8ce82d10716a846a7c
--- /dev/null
+++ b/anet_clip/backup/train_ft_gt.py
@@ -0,0 +1,516 @@
+# coding:utf-8
+
+''' 
+train_seq2.py is different from train_seq.py in the following aspects:
+
+1. train_seq2.py uses the same dataset for pretraining and target task
+2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs
+3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task
+4. checkpoint is located in save howto_yc2_* or howto_tasty_* 
+5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_*
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq2-ft-gt_percent-{}'.format(opt.ft_gt_percent)
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_pre_ft_gt.py b/anet_clip/backup/train_pre_ft_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e6c204b58c0fed4cca87004c6816d7830cee1cc
--- /dev/null
+++ b/anet_clip/backup/train_pre_ft_gt.py
@@ -0,0 +1,537 @@
+# coding:utf-8
+
+''' 
+similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions)
+
+set pretrain_data_mode to 'single', it is same as train_ft_gt.py.
+
+
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+pretrain_data_mode = 'single' # 'mix' or 'seq' or 'single'
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+    opt.refine_pseudo_box = False
+    opt.pseudo_box_aug = False
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-train", save_folder) # .replace('_seq2-ft', '')
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-anet_anet', 'howto_anet'))
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_test'):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+    # breakpoint()
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    
+    opt.id = 'seq2-pre-{}-ft({})-gt'.format(opt.pre_percent, pretrain_data_mode)
+    assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_pre_perc.py b/anet_clip/backup/train_pre_perc.py
new file mode 100644
index 0000000000000000000000000000000000000000..909dcdece82848854abf5f774b1d5f848f0a49eb
--- /dev/null
+++ b/anet_clip/backup/train_pre_perc.py
@@ -0,0 +1,484 @@
+# coding:utf-8
+'''
+cfgs is the same as train.py, but need add an extra argument: pre_percent
+recommend value: 0.1, 0.2, 0.4, 0.6, 0.8, 1
+'''
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_subdata = PercentageSubsetDataset(train_dataset_1, opt.pre_percent)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_subdata, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_all = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_dataset = PercentageSubsetDataset(train_dataset_all, opt.pre_percent)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-pre_perc-{}'.format(opt.pre_percent)
+    assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_seq.py b/anet_clip/backup/train_seq.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a415e180bf2506f1cbef5ce6d0f6f4205e76203
--- /dev/null
+++ b/anet_clip/backup/train_seq.py
@@ -0,0 +1,457 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        for train_loader in train_dataloaders:
+            trained_samples = 0
+            for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+                # # for fast debugging
+                # if trained_samples > 25:
+                #     break
+                # else:
+                #     trained_samples += 1
+                if opt.device=='cuda':
+                    torch.cuda.synchronize(opt.device)
+                if opt.debug:
+                    # each epoch contains less mini-batches for debugging
+                    if (iteration + 1) % 5 == 0:
+                        iteration += 1
+                        break
+                iteration += 1
+
+                optimizer.zero_grad()
+                dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+                dt['video_target'] = [
+                    {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+                # Add text encoder
+                # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+                #     captions = list()
+                #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+                #         captions.extend(video_sents) 
+                #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+                #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+                #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+                #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+                #     dt['text_encoder_input'] = text_encoder_input
+
+                # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+                output, loss = model(dt, criterion, contrastive_criterion)
+                final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+                # breakpoint()
+                final_loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+                optimizer.step()
+
+                for loss_k,loss_v in loss.items():
+                    loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+                loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+                if opt.device=='cuda':
+                    torch.cuda.synchronize()
+
+                losses_log_every = int(len(train_loader) / 10)
+
+                if opt.debug:
+                    losses_log_every = 6
+
+                if iteration % losses_log_every == 0:
+                    end = time.time()
+                    for k in loss_sum.keys():
+                        loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                    logger.info(
+                        "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                            .format(opt.id, iteration, epoch, loss_sum,
+                                    (end - start) / losses_log_every, bad_video_num))
+
+                    tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                    for loss_type in loss_sum.keys():
+                        tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                    loss_history[iteration] = loss_sum
+                    lr_history[iteration] = opt.current_lr
+                    loss_sum = OrderedDict()
+                    start = time.time()
+                    bad_video_num = 0
+                    torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-train'
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/backup/train_seq_gt.py b/anet_clip/backup/train_seq_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..235ae3a83169787f2b2db87e71f0fabe2dbc2dc1
--- /dev/null
+++ b/anet_clip/backup/train_seq_gt.py
@@ -0,0 +1,480 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # Create the dataset with the specified percentage
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+
+        # # Create a DataLoader for the subset dataset
+        # subset_dataloader = DataLoader(subset_data, batch_size=64, shuffle=True)
+        
+        train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print(f'the script only support two dataset training while {len(opt.visual_feature_folder)} dataset folders are provided')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        opt.use_pseudo_box = False # True for howto, False for yc2/tasty,
+        opt.pseudo_box_aug = False
+        opt.refine_pseudo_box = False
+        # breakpoint()
+
+        for train_loader in train_dataloaders:
+            opt.use_pseudo_box = not opt.use_pseudo_box
+            opt.
+            criterion.opt = opt 
+            criterion.matcher.use_pseudo_box = opt.use_pseudo_box
+
+            # if opt.use_pseudo_box:
+            #     print('howto dataset')
+            # else:
+            #     print('target dataset')
+            trained_samples = 0
+            for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+                # # for fast debugging
+                # if trained_samples > 25:
+                #     break
+                # else:
+                #     trained_samples += 1
+                if opt.device=='cuda':
+                    torch.cuda.synchronize(opt.device)
+                if opt.debug:
+                    # each epoch contains less mini-batches for debugging
+                    if (iteration + 1) % 5 == 0:
+                        iteration += 1
+                        break
+                iteration += 1
+
+                optimizer.zero_grad()
+                dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+                dt['video_target'] = [
+                    {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+                # Add text encoder
+                # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+                #     captions = list()
+                #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+                #         captions.extend(video_sents) 
+                #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+                #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+                #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+                #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+                #     dt['text_encoder_input'] = text_encoder_input
+
+                # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+                output, loss = model(dt, criterion, contrastive_criterion)
+                final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+                # breakpoint()
+                final_loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+                optimizer.step()
+
+                for loss_k,loss_v in loss.items():
+                    loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+                loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+                if opt.device=='cuda':
+                    torch.cuda.synchronize()
+
+                losses_log_every = int(len(train_loader) / 10)
+
+                if opt.debug:
+                    losses_log_every = 6
+
+                if iteration % losses_log_every == 0:
+                    end = time.time()
+                    for k in loss_sum.keys():
+                        loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                    logger.info(
+                        "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                            .format(opt.id, iteration, epoch, loss_sum,
+                                    (end - start) / losses_log_every, bad_video_num))
+
+                    tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                    for loss_type in loss_sum.keys():
+                        tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                    loss_history[iteration] = loss_sum
+                    lr_history[iteration] = opt.current_lr
+                    loss_sum = OrderedDict()
+                    start = time.time()
+                    bad_video_num = 0
+                    torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-gt_percent_{}'.format(opt.ft_gt_percent)
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/anet_clip/info.json b/anet_clip/info.json
new file mode 100644
index 0000000000000000000000000000000000000000..882983ce3374f2dc0e07ba72cc8c953647b5ce11
--- /dev/null
+++ b/anet_clip/info.json
@@ -0,0 +1 @@
+{"best": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/prediction/num4917_epoch19.json", "avg_proposal_num": -1, "Precision": 0.5612365263371945, "Recall": 0.5270524681293403}, "last": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.15656016917085527, "Bleu_2": 0.08210369852679855, "Bleu_3": 0.042491746140277446, "Bleu_4": 0.021149866989626908, "METEOR": 0.08752782819459405, "ROUGE_L": 0.1577032846084498, "CIDEr": 0.2687260839927409, "Recall": 0.4986985069085389, "Precision": 0.548450952477792, "soda_c": 0.045070258467165024, "para_Bleu_1": 0.36987086578065714, "para_Bleu_2": 0.1987998709052068, "para_Bleu_3": 0.11671522868501899, "para_Bleu_4": 0.07164097958462183, "para_METEOR": 0.13901753612789455, "para_ROUGE_L": 0.2826680559963382, "para_CIDEr": 0.0956891322121665, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.15965966113561106, "Bleu_2": 0.08785069799970043, "Bleu_3": 0.04739925348589703, "Bleu_4": 0.02377096308421814, "METEOR": 0.09062964515721111, "ROUGE_L": 0.1652647774491388, "CIDEr": 0.27366191469495676, "Recall": 0.45131293652113946, "Precision": 0.5379414954918249, "soda_c": 0.04303682007432423, "para_Bleu_1": 0.3640361416830845, "para_Bleu_2": 0.1986476696673755, "para_Bleu_3": 0.11814800235116821, "para_Bleu_4": 0.07336184523852665, "para_METEOR": 0.13911724177507803, "para_ROUGE_L": 0.28211794880017504, "para_CIDEr": 0.08634617454158834}}, "2": {"eval_score": {"Bleu_1": 0.15440507165989542, "Bleu_2": 0.08178273697953425, "Bleu_3": 0.042600749568780155, "Bleu_4": 0.02119123483046711, "METEOR": 0.08563216148714695, "ROUGE_L": 0.156809182143994, "CIDEr": 0.25960752079137744, "Recall": 0.5075951227720545, "Precision": 0.571834112941489, "soda_c": 0.048597974030683, "para_Bleu_1": 0.3985431504573892, "para_Bleu_2": 0.22415947108296613, "para_Bleu_3": 0.1341003834690626, "para_Bleu_4": 0.08312155143550452, "para_METEOR": 0.1510085678983445, "para_ROUGE_L": 0.2957598062989384, "para_CIDEr": 0.12271570278513648, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.16003947012491918, "Bleu_2": 0.08640386650819816, "Bleu_3": 0.045769192920880976, "Bleu_4": 0.023139762266241797, "METEOR": 0.08893476927946467, "ROUGE_L": 0.16285119298911696, "CIDEr": 0.27850058398714506, "Recall": 0.4974410652224822, "Precision": 0.571762083926507, "soda_c": 0.04898353247531122, "para_Bleu_1": 0.4116267700746525, "para_Bleu_2": 0.23315066082372427, "para_Bleu_3": 0.139785630195007, "para_Bleu_4": 0.08689414164874545, "para_METEOR": 0.15321412716959742, "para_ROUGE_L": 0.2993749803089721, "para_CIDEr": 0.12755194391496638, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.1612752203314224, "Bleu_2": 0.08712092952271142, "Bleu_3": 0.04643407984417907, "Bleu_4": 0.024237450149938583, "METEOR": 0.0888552980469009, "ROUGE_L": 0.16165678007821221, "CIDEr": 0.28844655875134945, "Recall": 0.5079771255793173, "Precision": 0.5707494407158785, "soda_c": 0.05143467092505771, "para_Bleu_1": 0.425828341023263, "para_Bleu_2": 0.2431293051387748, "para_Bleu_3": 0.14662751878582, "para_Bleu_4": 0.09131956416083617, "para_METEOR": 0.15868276543147294, "para_ROUGE_L": 0.30762031965083425, "para_CIDEr": 0.1438790695271004, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.16203040821313286, "Bleu_2": 0.087418866671477, "Bleu_3": 0.04641401855891123, "Bleu_4": 0.023872355329811287, "METEOR": 0.08736154709181514, "ROUGE_L": 0.16095171754962678, "CIDEr": 0.3019460931650574, "Recall": 0.5237442505746305, "Precision": 0.5691986983933232, "soda_c": 0.05366939846142926, "para_Bleu_1": 0.4285515683378188, "para_Bleu_2": 0.24896313523930838, "para_Bleu_3": 0.15083849533584295, "para_Bleu_4": 0.09425440122753082, "para_METEOR": 0.15418242275887206, "para_ROUGE_L": 0.3037081433191389, "para_CIDEr": 0.16822639157343386, "avg_proposal_number": -1}}, "6": {"eval_score": {"Bleu_1": 0.17095715677415013, "Bleu_2": 0.0951967897773989, "Bleu_3": 0.05145074727592996, "Bleu_4": 0.026686223548170303, "METEOR": 0.09033289555302068, "ROUGE_L": 0.16939818741017104, "CIDEr": 0.33299543538258497, "Recall": 0.5001550726802355, "Precision": 0.5629321740898863, "soda_c": 0.05378783144134501, "para_Bleu_1": 0.44719474980697405, "para_Bleu_2": 0.2615784516531111, "para_Bleu_3": 0.15956746990786394, "para_Bleu_4": 0.09983770060804388, "para_METEOR": 0.15549284849496958, "para_ROUGE_L": 0.30852597622578265, "para_CIDEr": 0.18758102150887232, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.16525493799366836, "Bleu_2": 0.09017429361474327, "Bleu_3": 0.04843073565357156, "Bleu_4": 0.025752141227780294, "METEOR": 0.09042668571725655, "ROUGE_L": 0.1657835735936403, "CIDEr": 0.30766696683798356, "Recall": 0.5070758476264831, "Precision": 0.5698723815334497, "soda_c": 0.05193286444599829, "para_Bleu_1": 0.4299765573510605, "para_Bleu_2": 0.24998607326423264, "para_Bleu_3": 0.15168978606887273, "para_Bleu_4": 0.09540463753102806, "para_METEOR": 0.15913054274631774, "para_ROUGE_L": 0.30821511076520103, "para_CIDEr": 0.14655297481419807}}, "8": {"eval_score": {"Bleu_1": 0.1659435247550983, "Bleu_2": 0.09010888064116455, "Bleu_3": 0.04740925434645997, "Bleu_4": 0.023810200153797586, "METEOR": 0.0893691583245007, "ROUGE_L": 0.16481267120708817, "CIDEr": 0.3096929324572276, "Recall": 0.5271698247293078, "Precision": 0.5766981899532185, "soda_c": 0.05637593299631936, "para_Bleu_1": 0.4507795558374508, "para_Bleu_2": 0.2668765313566654, "para_Bleu_3": 0.16324000259413463, "para_Bleu_4": 0.10292908422008885, "para_METEOR": 0.163503434468027, "para_ROUGE_L": 0.3141109355407807, "para_CIDEr": 0.1830754815850521, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.16664911544364056, "Bleu_2": 0.09023295213839283, "Bleu_3": 0.04763940550902772, "Bleu_4": 0.02409205514859969, "METEOR": 0.0878588871148787, "ROUGE_L": 0.16401896184386325, "CIDEr": 0.31947446694949533, "Recall": 0.5282742157284517, "Precision": 0.5750796556165633, "soda_c": 0.05745241491068406, "para_Bleu_1": 0.46204429574393835, "para_Bleu_2": 0.2749900961045832, "para_Bleu_3": 0.1683879565471281, "para_Bleu_4": 0.10624339593597942, "para_METEOR": 0.16245439213508253, "para_ROUGE_L": 0.3162965936511474, "para_CIDEr": 0.20803178964320856, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.1671778590456048, "Bleu_2": 0.09077014613023152, "Bleu_3": 0.0476684747303012, "Bleu_4": 0.02445564298599047, "METEOR": 0.08933235383587503, "ROUGE_L": 0.1654660162888944, "CIDEr": 0.31886265111118334, "Recall": 0.5314017615268335, "Precision": 0.5831469052945512, "soda_c": 0.05853263249839839, "para_Bleu_1": 0.46544090189732323, "para_Bleu_2": 0.2789325258737778, "para_Bleu_3": 0.17172911957785325, "para_Bleu_4": 0.10903514181091935, "para_METEOR": 0.16550159188298816, "para_ROUGE_L": 0.3181118223429575, "para_CIDEr": 0.2056618808195008, "avg_proposal_number": -1}}, "11": {"eval_score": {"Bleu_1": 0.16560019346009094, "Bleu_2": 0.08934946581658681, "Bleu_3": 0.04692472826903507, "Bleu_4": 0.023331060597699706, "METEOR": 0.08861943572471001, "ROUGE_L": 0.16392659155605854, "CIDEr": 0.31177527957257306, "Recall": 0.5248955646301546, "Precision": 0.5713061826316813, "soda_c": 0.056694173808073595, "para_Bleu_1": 0.45551540477127933, "para_Bleu_2": 0.2725270289009415, "para_Bleu_3": 0.16731081427102573, "para_Bleu_4": 0.10555679460767188, "para_METEOR": 0.1665724805603667, "para_ROUGE_L": 0.31619749898051375, "para_CIDEr": 0.19719071969736374}}, "12": {"eval_score": {"Bleu_1": 0.16778675341331784, "Bleu_2": 0.09082555766488616, "Bleu_3": 0.047445681271689716, "Bleu_4": 0.02375280793420285, "METEOR": 0.08883520478698428, "ROUGE_L": 0.16531435721130755, "CIDEr": 0.31778343902267087, "Recall": 0.5273619026669621, "Precision": 0.5698181479221706, "soda_c": 0.05753856798988932, "para_Bleu_1": 0.4610381779339771, "para_Bleu_2": 0.2761144617772928, "para_Bleu_3": 0.16915034097081671, "para_Bleu_4": 0.10654029953240575, "para_METEOR": 0.16638305166981465, "para_ROUGE_L": 0.31710573495570465, "para_CIDEr": 0.19601570682645908}}, "13": {"eval_score": {"Bleu_1": 0.16683698969676453, "Bleu_2": 0.09036855967772307, "Bleu_3": 0.047484441130632896, "Bleu_4": 0.023876859658376735, "METEOR": 0.08814626862844692, "ROUGE_L": 0.16473003568483396, "CIDEr": 0.3189568758512915, "Recall": 0.5281546209817979, "Precision": 0.5704333604501349, "soda_c": 0.057417105431783064, "para_Bleu_1": 0.4580706340663244, "para_Bleu_2": 0.27372623489326064, "para_Bleu_3": 0.16745128920972313, "para_Bleu_4": 0.10550306643408856, "para_METEOR": 0.16656454278617736, "para_ROUGE_L": 0.31631873012989425, "para_CIDEr": 0.19724321819057877}}, "14": {"eval_score": {"Bleu_1": 0.16662144072598145, "Bleu_2": 0.08988753231411394, "Bleu_3": 0.04690847145308288, "Bleu_4": 0.023224274927987735, "METEOR": 0.08725158341768323, "ROUGE_L": 0.16364893754496343, "CIDEr": 0.32028824475030926, "Recall": 0.5260420675803493, "Precision": 0.5630584367161506, "soda_c": 0.057565785652999135, "para_Bleu_1": 0.46764194087144684, "para_Bleu_2": 0.2801629240374498, "para_Bleu_3": 0.1713033186995987, "para_Bleu_4": 0.10750827268624512, "para_METEOR": 0.16742715934059368, "para_ROUGE_L": 0.31858424377772926, "para_CIDEr": 0.2089956210595351, "avg_proposal_number": -1}}, "15": {"eval_score": {"Bleu_1": 0.16754398447821903, "Bleu_2": 0.08978801866243748, "Bleu_3": 0.046077601805781236, "Bleu_4": 0.02215727819941335, "METEOR": 0.08650894641812401, "ROUGE_L": 0.16425299709373153, "CIDEr": 0.3192637628790779, "Recall": 0.5308598805776927, "Precision": 0.5705477594739302, "soda_c": 0.059035206979637336, "para_Bleu_1": 0.4722129873397206, "para_Bleu_2": 0.2843271953295457, "para_Bleu_3": 0.17433620623201318, "para_Bleu_4": 0.10943737200004257, "para_METEOR": 0.16524483023272712, "para_ROUGE_L": 0.3180351825656492, "para_CIDEr": 0.2139382514781602, "avg_proposal_number": -1}}, "16": {"eval_score": {"Bleu_1": 0.16584280243722227, "Bleu_2": 0.08889969905794425, "Bleu_3": 0.04569298286173284, "Bleu_4": 0.021992960199339176, "METEOR": 0.08570833880397384, "ROUGE_L": 0.16234979503724006, "CIDEr": 0.3170462149966731, "Recall": 0.5273397281824633, "Precision": 0.5648989898989865, "soda_c": 0.058539462474976364, "para_Bleu_1": 0.4735378044184376, "para_Bleu_2": 0.2855599966961999, "para_Bleu_3": 0.17485842077678387, "para_Bleu_4": 0.10998333079246524, "para_METEOR": 0.16580782598840993, "para_ROUGE_L": 0.3184105968751349, "para_CIDEr": 0.2144083270960459, "avg_proposal_number": -1}}, "17": {"eval_score": {"Bleu_1": 0.16720622564646215, "Bleu_2": 0.08946643461131876, "Bleu_3": 0.04568137095423273, "Bleu_4": 0.022039722503534608, "METEOR": 0.08588931176535387, "ROUGE_L": 0.16315869782389542, "CIDEr": 0.32099741016990446, "Recall": 0.5265047853249455, "Precision": 0.5647345942647923, "soda_c": 0.05847424883094643, "para_Bleu_1": 0.47508155945278135, "para_Bleu_2": 0.2858233856765029, "para_Bleu_3": 0.17499503512152859, "para_Bleu_4": 0.11002968407978216, "para_METEOR": 0.16541373751181562, "para_ROUGE_L": 0.3190110890037882, "para_CIDEr": 0.21421557986951392}}, "18": {"eval_score": {"Bleu_1": 0.1662475028889873, "Bleu_2": 0.08895418147726737, "Bleu_3": 0.04559170272578064, "Bleu_4": 0.021869443641790748, "METEOR": 0.0853620749347768, "ROUGE_L": 0.16226693807975517, "CIDEr": 0.3203697867996399, "Recall": 0.5243080966273422, "Precision": 0.5592002237136435, "soda_c": 0.058066485957305666, "para_Bleu_1": 0.47302383939773723, "para_Bleu_2": 0.2848420020452884, "para_Bleu_3": 0.17477626094199183, "para_Bleu_4": 0.11005159892431456, "para_METEOR": 0.16474042555391544, "para_ROUGE_L": 0.31754161420686944, "para_CIDEr": 0.2082818020277855}}, "19": {"eval_score": {"Bleu_1": 0.16600244771432068, "Bleu_2": 0.08859363359362551, "Bleu_3": 0.045174799285766926, "Bleu_4": 0.021453706973694267, "METEOR": 0.08469975853590762, "ROUGE_L": 0.1615333099598977, "CIDEr": 0.3178372173219055, "Recall": 0.5270524681293403, "Precision": 0.5612365263371945, "soda_c": 0.05852570981425518, "para_Bleu_1": 0.47641872729084495, "para_Bleu_2": 0.28679556025023933, "para_Bleu_3": 0.1757988669447671, "para_Bleu_4": 0.11061748158923715, "para_METEOR": 0.1647238014039032, "para_ROUGE_L": 0.3182336912910021, "para_CIDEr": 0.21852415031403352, "avg_proposal_number": -1}}}, "loss_history": {"1000": {"loss_ce": 0.284, "loss_counter": 0.126, "loss_bbox": 0.117, "loss_giou": 0.275, "loss_self_iou": 0.126, "cardinality_error": 3.775, "loss_ce_0": 0.284, "loss_counter_0": 0.126, "loss_bbox_0": 0.118, "loss_giou_0": 0.276, "loss_self_iou_0": 0.126, "cardinality_error_0": 3.775, "loss_caption_0": 3.781, "loss_caption": 3.778, "total_loss": 18.585}, "2000": {"loss_ce": 0.287, "loss_counter": 0.119, "loss_bbox": 0.087, "loss_giou": 0.239, "loss_self_iou": 0.12, "cardinality_error": 3.705, "loss_ce_0": 0.289, "loss_counter_0": 0.118, "loss_bbox_0": 0.087, "loss_giou_0": 0.239, "loss_self_iou_0": 0.121, "cardinality_error_0": 3.705, "loss_caption_0": 3.682, "loss_caption": 3.675, "total_loss": 17.896}, "3000": {"loss_ce": 0.291, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.227, "loss_self_iou": 0.098, "cardinality_error": 3.705, "loss_ce_0": 0.292, "loss_counter_0": 0.122, "loss_bbox_0": 0.078, "loss_giou_0": 0.228, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.705, "loss_caption_0": 3.668, "loss_caption": 3.664, "total_loss": 17.771}, "4000": {"loss_ce": 0.289, "loss_counter": 0.126, "loss_bbox": 0.078, "loss_giou": 0.224, "loss_self_iou": 0.1, "cardinality_error": 3.784, "loss_ce_0": 0.291, "loss_counter_0": 0.127, "loss_bbox_0": 0.078, "loss_giou_0": 0.223, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.784, "loss_caption_0": 3.624, "loss_caption": 3.629, "total_loss": 17.579}, "5000": {"loss_ce": 0.285, "loss_counter": 0.121, "loss_bbox": 0.08, "loss_giou": 0.218, "loss_self_iou": 0.114, "cardinality_error": 3.674, "loss_ce_0": 0.287, "loss_counter_0": 0.121, "loss_bbox_0": 0.08, "loss_giou_0": 0.218, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.674, "loss_caption_0": 3.629, "loss_caption": 3.629, "total_loss": 17.526}, "6000": {"loss_ce": 0.292, "loss_counter": 0.13, "loss_bbox": 0.076, "loss_giou": 0.22, "loss_self_iou": 0.098, "cardinality_error": 3.786, "loss_ce_0": 0.293, "loss_counter_0": 0.129, "loss_bbox_0": 0.076, "loss_giou_0": 0.22, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.786, "loss_caption_0": 3.625, "loss_caption": 3.622, "total_loss": 17.555}, "7000": {"loss_ce": 0.292, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.215, "loss_self_iou": 0.097, "cardinality_error": 3.746, "loss_ce_0": 0.293, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.746, "loss_caption_0": 3.58, "loss_caption": 3.576, "total_loss": 17.319}, "8000": {"loss_ce": 0.288, "loss_counter": 0.129, "loss_bbox": 0.078, "loss_giou": 0.218, "loss_self_iou": 0.108, "cardinality_error": 3.754, "loss_ce_0": 0.288, "loss_counter_0": 0.128, "loss_bbox_0": 0.079, "loss_giou_0": 0.218, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.754, "loss_caption_0": 3.546, "loss_caption": 3.546, "total_loss": 17.209}, "9000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.078, "loss_giou": 0.219, "loss_self_iou": 0.1, "cardinality_error": 3.685, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.078, "loss_giou_0": 0.219, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.685, "loss_caption_0": 3.544, "loss_caption": 3.54, "total_loss": 17.2}, "10000": {"loss_ce": 0.293, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.22, "loss_self_iou": 0.101, "cardinality_error": 3.748, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.078, "loss_giou_0": 0.22, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.748, "loss_caption_0": 3.582, "loss_caption": 3.577, "total_loss": 17.376}, "11000": {"loss_ce": 0.29, "loss_counter": 0.124, "loss_bbox": 0.077, "loss_giou": 0.217, "loss_self_iou": 0.101, "cardinality_error": 3.788, "loss_ce_0": 0.292, "loss_counter_0": 0.123, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.788, "loss_caption_0": 3.446, "loss_caption": 3.443, "total_loss": 16.802}, "12000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.694, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.213, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.694, "loss_caption_0": 3.427, "loss_caption": 3.428, "total_loss": 16.701}, "13000": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.217, "loss_self_iou": 0.107, "cardinality_error": 3.689, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.689, "loss_caption_0": 3.464, "loss_caption": 3.461, "total_loss": 16.871}, "14000": {"loss_ce": 0.292, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.21, "loss_self_iou": 0.1, "cardinality_error": 3.663, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.211, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.663, "loss_caption_0": 3.414, "loss_caption": 3.41, "total_loss": 16.616}, "15000": {"loss_ce": 0.295, "loss_counter": 0.127, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.828, "loss_ce_0": 0.296, "loss_counter_0": 0.127, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.828, "loss_caption_0": 3.453, "loss_caption": 3.453, "total_loss": 16.836}, "16000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.073, "loss_giou": 0.206, "loss_self_iou": 0.105, "cardinality_error": 3.687, "loss_ce_0": 0.297, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.207, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.687, "loss_caption_0": 3.461, "loss_caption": 3.462, "total_loss": 16.803}, "17000": {"loss_ce": 0.3, "loss_counter": 0.127, "loss_bbox": 0.073, "loss_giou": 0.208, "loss_self_iou": 0.102, "cardinality_error": 3.791, "loss_ce_0": 0.3, "loss_counter_0": 0.127, "loss_bbox_0": 0.073, "loss_giou_0": 0.209, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.791, "loss_caption_0": 3.469, "loss_caption": 3.465, "total_loss": 16.864}, "18000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.074, "loss_giou": 0.205, "loss_self_iou": 0.107, "cardinality_error": 3.68, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.074, "loss_giou_0": 0.206, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.68, "loss_caption_0": 3.478, "loss_caption": 3.475, "total_loss": 16.859}, "19000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.073, "loss_giou": 0.207, "loss_self_iou": 0.099, "cardinality_error": 3.752, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.072, "loss_giou_0": 0.208, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.752, "loss_caption_0": 3.396, "loss_caption": 3.396, "total_loss": 16.585}, "20000": {"loss_ce": 0.303, "loss_counter": 0.128, "loss_bbox": 0.071, "loss_giou": 0.208, "loss_self_iou": 0.101, "cardinality_error": 3.804, "loss_ce_0": 0.304, "loss_counter_0": 0.128, "loss_bbox_0": 0.071, "loss_giou_0": 0.208, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.804, "loss_caption_0": 3.42, "loss_caption": 3.419, "total_loss": 16.684}, "21000": {"loss_ce": 0.298, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.202, "loss_self_iou": 0.101, "cardinality_error": 3.666, "loss_ce_0": 0.299, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.202, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.666, "loss_caption_0": 3.344, "loss_caption": 3.335, "total_loss": 16.294}, "22000": {"loss_ce": 0.293, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.201, "loss_self_iou": 0.109, "cardinality_error": 3.752, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.203, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.752, "loss_caption_0": 3.302, "loss_caption": 3.304, "total_loss": 16.116}, "23000": {"loss_ce": 0.299, "loss_counter": 0.128, "loss_bbox": 0.077, "loss_giou": 0.208, "loss_self_iou": 0.113, "cardinality_error": 3.803, "loss_ce_0": 0.299, "loss_counter_0": 0.128, "loss_bbox_0": 0.076, "loss_giou_0": 0.208, "loss_self_iou_0": 0.112, "cardinality_error_0": 3.803, "loss_caption_0": 3.348, "loss_caption": 3.34, "total_loss": 16.363}, "24000": {"loss_ce": 0.293, "loss_counter": 0.122, "loss_bbox": 0.076, "loss_giou": 0.207, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.294, "loss_counter_0": 0.122, "loss_bbox_0": 0.076, "loss_giou_0": 0.207, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 3.354, "loss_caption": 3.351, "total_loss": 16.364}, "25000": {"loss_ce": 0.294, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.213, "loss_self_iou": 0.091, "cardinality_error": 3.734, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.077, "loss_giou_0": 0.214, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.734, "loss_caption_0": 3.372, "loss_caption": 3.372, "total_loss": 16.494}, "26000": {"loss_ce": 0.298, "loss_counter": 0.125, "loss_bbox": 0.072, "loss_giou": 0.203, "loss_self_iou": 0.096, "cardinality_error": 3.784, "loss_ce_0": 0.299, "loss_counter_0": 0.125, "loss_bbox_0": 0.073, "loss_giou_0": 0.204, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.784, "loss_caption_0": 3.334, "loss_caption": 3.333, "total_loss": 16.279}, "27000": {"loss_ce": 0.289, "loss_counter": 0.118, "loss_bbox": 0.076, "loss_giou": 0.203, "loss_self_iou": 0.102, "cardinality_error": 3.64, "loss_ce_0": 0.291, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.203, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.64, "loss_caption_0": 3.348, "loss_caption": 3.345, "total_loss": 16.287}, "28000": {"loss_ce": 0.292, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.201, "loss_self_iou": 0.095, "cardinality_error": 3.774, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.076, "loss_giou_0": 0.202, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.774, "loss_caption_0": 3.337, "loss_caption": 3.333, "total_loss": 16.249}, "29000": {"loss_ce": 0.298, "loss_counter": 0.12, "loss_bbox": 0.075, "loss_giou": 0.204, "loss_self_iou": 0.1, "cardinality_error": 3.755, "loss_ce_0": 0.299, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.205, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.755, "loss_caption_0": 3.315, "loss_caption": 3.321, "total_loss": 16.223}, "30000": {"loss_ce": 0.302, "loss_counter": 0.119, "loss_bbox": 0.071, "loss_giou": 0.195, "loss_self_iou": 0.103, "cardinality_error": 3.72, "loss_ce_0": 0.302, "loss_counter_0": 0.119, "loss_bbox_0": 0.072, "loss_giou_0": 0.196, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.72, "loss_caption_0": 3.347, "loss_caption": 3.349, "total_loss": 16.283}, "31000": {"loss_ce": 0.296, "loss_counter": 0.123, "loss_bbox": 0.073, "loss_giou": 0.202, "loss_self_iou": 0.114, "cardinality_error": 3.772, "loss_ce_0": 0.296, "loss_counter_0": 0.123, "loss_bbox_0": 0.074, "loss_giou_0": 0.203, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.772, "loss_caption_0": 3.24, "loss_caption": 3.242, "total_loss": 15.889}, "32000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.093, "cardinality_error": 3.66, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.195, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.66, "loss_caption_0": 3.251, "loss_caption": 3.248, "total_loss": 15.869}, "33000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.787, "loss_ce_0": 0.301, "loss_counter_0": 0.126, "loss_bbox_0": 0.071, "loss_giou_0": 0.199, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.787, "loss_caption_0": 3.223, "loss_caption": 3.225, "total_loss": 15.81}, "34000": {"loss_ce": 0.297, "loss_counter": 0.121, "loss_bbox": 0.076, "loss_giou": 0.201, "loss_self_iou": 0.107, "cardinality_error": 3.719, "loss_ce_0": 0.296, "loss_counter_0": 0.121, "loss_bbox_0": 0.077, "loss_giou_0": 0.202, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.719, "loss_caption_0": 3.21, "loss_caption": 3.206, "total_loss": 15.752}, "35000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.074, "loss_giou": 0.201, "loss_self_iou": 0.1, "cardinality_error": 3.761, "loss_ce_0": 0.304, "loss_counter_0": 0.121, "loss_bbox_0": 0.073, "loss_giou_0": 0.202, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.761, "loss_caption_0": 3.261, "loss_caption": 3.267, "total_loss": 16.006}, "36000": {"loss_ce": 0.302, "loss_counter": 0.12, "loss_bbox": 0.074, "loss_giou": 0.202, "loss_self_iou": 0.096, "cardinality_error": 3.731, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.203, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.731, "loss_caption_0": 3.322, "loss_caption": 3.322, "total_loss": 16.237}, "37000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.088, "cardinality_error": 3.747, "loss_ce_0": 0.306, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.195, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.747, "loss_caption_0": 3.276, "loss_caption": 3.278, "total_loss": 16.005}, "38000": {"loss_ce": 0.295, "loss_counter": 0.122, "loss_bbox": 0.073, "loss_giou": 0.198, "loss_self_iou": 0.096, "cardinality_error": 3.747, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.074, "loss_giou_0": 0.199, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.747, "loss_caption_0": 3.26, "loss_caption": 3.267, "total_loss": 15.944}, "39000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.073, "loss_giou": 0.194, "loss_self_iou": 0.096, "cardinality_error": 3.714, "loss_ce_0": 0.3, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.196, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 3.29, "loss_caption": 3.284, "total_loss": 16.029}, "40000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.068, "loss_giou": 0.187, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.189, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 3.255, "loss_caption": 3.258, "total_loss": 15.861}, "41000": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.196, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.071, "loss_giou_0": 0.197, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 3.159, "loss_caption": 3.162, "total_loss": 15.549}, "42000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.698, "loss_ce_0": 0.298, "loss_counter_0": 0.116, "loss_bbox_0": 0.071, "loss_giou_0": 0.189, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.698, "loss_caption_0": 3.191, "loss_caption": 3.187, "total_loss": 15.571}, "43000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.198, "loss_self_iou": 0.089, "cardinality_error": 3.785, "loss_ce_0": 0.306, "loss_counter_0": 0.119, "loss_bbox_0": 0.069, "loss_giou_0": 0.2, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.785, "loss_caption_0": 3.247, "loss_caption": 3.249, "total_loss": 15.93}, "44000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.072, "loss_giou": 0.194, "loss_self_iou": 0.104, "cardinality_error": 3.727, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.195, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.727, "loss_caption_0": 3.228, "loss_caption": 3.227, "total_loss": 15.794}, "45000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.194, "loss_self_iou": 0.094, "cardinality_error": 3.684, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.196, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.684, "loss_caption_0": 3.138, "loss_caption": 3.143, "total_loss": 15.458}, "46000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.194, "loss_self_iou": 0.107, "cardinality_error": 3.8, "loss_ce_0": 0.301, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.196, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.8, "loss_caption_0": 3.198, "loss_caption": 3.202, "total_loss": 15.69}, "47000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.071, "loss_giou": 0.193, "loss_self_iou": 0.1, "cardinality_error": 3.724, "loss_ce_0": 0.302, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.194, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.724, "loss_caption_0": 3.166, "loss_caption": 3.167, "total_loss": 15.544}, "48000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.074, "loss_giou": 0.194, "loss_self_iou": 0.1, "cardinality_error": 3.779, "loss_ce_0": 0.303, "loss_counter_0": 0.126, "loss_bbox_0": 0.073, "loss_giou_0": 0.195, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.779, "loss_caption_0": 3.197, "loss_caption": 3.204, "total_loss": 15.693}, "49000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.186, "loss_self_iou": 0.103, "cardinality_error": 3.67, "loss_ce_0": 0.299, "loss_counter_0": 0.117, "loss_bbox_0": 0.073, "loss_giou_0": 0.189, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.67, "loss_caption_0": 3.197, "loss_caption": 3.193, "total_loss": 15.597}, "50000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.191, "loss_self_iou": 0.1, "cardinality_error": 3.769, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 3.195, "loss_caption": 3.196, "total_loss": 15.646}, "51000": {"loss_ce": 0.304, "loss_counter": 0.119, "loss_bbox": 0.072, "loss_giou": 0.19, "loss_self_iou": 0.1, "cardinality_error": 3.708, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.708, "loss_caption_0": 3.123, "loss_caption": 3.122, "total_loss": 15.345}, "52000": {"loss_ce": 0.302, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.195, "loss_self_iou": 0.091, "cardinality_error": 3.787, "loss_ce_0": 0.302, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.198, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.787, "loss_caption_0": 3.08, "loss_caption": 3.08, "total_loss": 15.224}, "53000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.192, "loss_self_iou": 0.101, "cardinality_error": 3.688, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.688, "loss_caption_0": 3.121, "loss_caption": 3.125, "total_loss": 15.366}, "54000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.66, "loss_ce_0": 0.303, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.187, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.66, "loss_caption_0": 3.151, "loss_caption": 3.158, "total_loss": 15.44}, "55000": {"loss_ce": 0.314, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.759, "loss_ce_0": 0.314, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.759, "loss_caption_0": 3.137, "loss_caption": 3.138, "total_loss": 15.427}, "56000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.7, "loss_ce_0": 0.303, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.189, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.7, "loss_caption_0": 3.128, "loss_caption": 3.132, "total_loss": 15.353}, "57000": {"loss_ce": 0.308, "loss_counter": 0.125, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.833, "loss_ce_0": 0.308, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.833, "loss_caption_0": 3.157, "loss_caption": 3.154, "total_loss": 15.516}, "58000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.072, "loss_giou": 0.192, "loss_self_iou": 0.099, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.073, "loss_giou_0": 0.192, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.724, "loss_caption_0": 3.092, "loss_caption": 3.088, "total_loss": 15.209}, "59000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.092, "cardinality_error": 3.806, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.806, "loss_caption_0": 3.204, "loss_caption": 3.204, "total_loss": 15.668}, "60000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.73, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.074, "loss_giou_0": 0.198, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.73, "loss_caption_0": 3.185, "loss_caption": 3.179, "total_loss": 15.62}, "61000": {"loss_ce": 0.302, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.687, "loss_ce_0": 0.303, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.185, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.687, "loss_caption_0": 3.025, "loss_caption": 3.031, "total_loss": 14.914}, "62000": {"loss_ce": 0.305, "loss_counter": 0.125, "loss_bbox": 0.068, "loss_giou": 0.192, "loss_self_iou": 0.088, "cardinality_error": 3.809, "loss_ce_0": 0.304, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.809, "loss_caption_0": 3.067, "loss_caption": 3.064, "total_loss": 15.147}, "63000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.072, "loss_giou": 0.189, "loss_self_iou": 0.102, "cardinality_error": 3.636, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.073, "loss_giou_0": 0.193, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.636, "loss_caption_0": 3.09, "loss_caption": 3.083, "total_loss": 15.188}, "64000": {"loss_ce": 0.308, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.185, "loss_self_iou": 0.105, "cardinality_error": 3.738, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.067, "loss_giou_0": 0.186, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.738, "loss_caption_0": 3.09, "loss_caption": 3.088, "total_loss": 15.193}, "65000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.191, "loss_self_iou": 0.094, "cardinality_error": 3.735, "loss_ce_0": 0.304, "loss_counter_0": 0.123, "loss_bbox_0": 0.069, "loss_giou_0": 0.191, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.735, "loss_caption_0": 3.087, "loss_caption": 3.083, "total_loss": 15.203}, "66000": {"loss_ce": 0.307, "loss_counter": 0.121, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.095, "cardinality_error": 3.753, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.753, "loss_caption_0": 3.093, "loss_caption": 3.093, "total_loss": 15.235}, "67000": {"loss_ce": 0.299, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.189, "loss_self_iou": 0.099, "cardinality_error": 3.781, "loss_ce_0": 0.299, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.781, "loss_caption_0": 3.104, "loss_caption": 3.095, "total_loss": 15.24}, "68000": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.702, "loss_ce_0": 0.3, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.187, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.702, "loss_caption_0": 3.092, "loss_caption": 3.087, "total_loss": 15.171}, "69000": {"loss_ce": 0.304, "loss_counter": 0.116, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.087, "cardinality_error": 3.705, "loss_ce_0": 0.303, "loss_counter_0": 0.116, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.705, "loss_caption_0": 3.087, "loss_caption": 3.084, "total_loss": 15.154}, "70000": {"loss_ce": 0.308, "loss_counter": 0.119, "loss_bbox": 0.07, "loss_giou": 0.188, "loss_self_iou": 0.104, "cardinality_error": 3.763, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.763, "loss_caption_0": 3.137, "loss_caption": 3.142, "total_loss": 15.421}, "71000": {"loss_ce": 0.304, "loss_counter": 0.115, "loss_bbox": 0.067, "loss_giou": 0.187, "loss_self_iou": 0.091, "cardinality_error": 3.724, "loss_ce_0": 0.304, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.189, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.724, "loss_caption_0": 2.994, "loss_caption": 2.994, "total_loss": 14.812}, "72000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.099, "cardinality_error": 3.665, "loss_ce_0": 0.296, "loss_counter_0": 0.118, "loss_bbox_0": 0.072, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.665, "loss_caption_0": 2.995, "loss_caption": 3.0, "total_loss": 14.803}, "73000": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.067, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.762, "loss_ce_0": 0.302, "loss_counter_0": 0.122, "loss_bbox_0": 0.067, "loss_giou_0": 0.184, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.762, "loss_caption_0": 3.03, "loss_caption": 3.034, "total_loss": 14.924}, "74000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.181, "loss_self_iou": 0.093, "cardinality_error": 3.722, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.722, "loss_caption_0": 3.061, "loss_caption": 3.062, "total_loss": 15.037}, "75000": {"loss_ce": 0.3, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.835, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.835, "loss_caption_0": 3.102, "loss_caption": 3.108, "total_loss": 15.261}, "76000": {"loss_ce": 0.304, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.19, "loss_self_iou": 0.096, "cardinality_error": 3.787, "loss_ce_0": 0.305, "loss_counter_0": 0.118, "loss_bbox_0": 0.069, "loss_giou_0": 0.192, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.787, "loss_caption_0": 3.055, "loss_caption": 3.056, "total_loss": 15.081}, "77000": {"loss_ce": 0.3, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.191, "loss_self_iou": 0.101, "cardinality_error": 3.753, "loss_ce_0": 0.3, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.192, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.753, "loss_caption_0": 3.064, "loss_caption": 3.063, "total_loss": 15.105}, "78000": {"loss_ce": 0.303, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.812, "loss_ce_0": 0.302, "loss_counter_0": 0.118, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.812, "loss_caption_0": 3.075, "loss_caption": 3.081, "total_loss": 15.186}, "79000": {"loss_ce": 0.303, "loss_counter": 0.119, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.712, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.712, "loss_caption_0": 3.004, "loss_caption": 3.004, "total_loss": 14.833}, "80000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.639, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.069, "loss_giou_0": 0.185, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.639, "loss_caption_0": 3.011, "loss_caption": 3.021, "total_loss": 14.846}, "81000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.098, "cardinality_error": 3.664, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.664, "loss_caption_0": 2.972, "loss_caption": 2.974, "total_loss": 14.63}, "82000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.098, "cardinality_error": 3.692, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.692, "loss_caption_0": 2.914, "loss_caption": 2.912, "total_loss": 14.413}, "83000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.068, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.764, "loss_caption_0": 2.939, "loss_caption": 2.933, "total_loss": 14.562}, "84000": {"loss_ce": 0.299, "loss_counter": 0.119, "loss_bbox": 0.066, "loss_giou": 0.18, "loss_self_iou": 0.086, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.119, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.086, "cardinality_error_0": 3.724, "loss_caption_0": 2.964, "loss_caption": 2.963, "total_loss": 14.614}, "85000": {"loss_ce": 0.301, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.187, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.301, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.189, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 2.942, "loss_caption": 2.945, "total_loss": 14.596}, "86000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.067, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.764, "loss_caption_0": 2.989, "loss_caption": 2.988, "total_loss": 14.745}, "87000": {"loss_ce": 0.295, "loss_counter": 0.119, "loss_bbox": 0.067, "loss_giou": 0.178, "loss_self_iou": 0.096, "cardinality_error": 3.692, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.692, "loss_caption_0": 2.93, "loss_caption": 2.931, "total_loss": 14.465}, "88000": {"loss_ce": 0.299, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.102, "cardinality_error": 3.74, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.184, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.74, "loss_caption_0": 2.945, "loss_caption": 2.939, "total_loss": 14.538}, "89000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.096, "cardinality_error": 3.911, "loss_ce_0": 0.303, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.911, "loss_caption_0": 2.981, "loss_caption": 2.985, "total_loss": 14.762}, "90000": {"loss_ce": 0.298, "loss_counter": 0.113, "loss_bbox": 0.066, "loss_giou": 0.174, "loss_self_iou": 0.099, "cardinality_error": 3.667, "loss_ce_0": 0.3, "loss_counter_0": 0.112, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.667, "loss_caption_0": 2.946, "loss_caption": 2.945, "total_loss": 14.493}, "91000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.066, "loss_giou": 0.179, "loss_self_iou": 0.097, "cardinality_error": 3.807, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.807, "loss_caption_0": 2.916, "loss_caption": 2.914, "total_loss": 14.411}, "92000": {"loss_ce": 0.298, "loss_counter": 0.121, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.093, "cardinality_error": 3.784, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.784, "loss_caption_0": 2.916, "loss_caption": 2.915, "total_loss": 14.422}, "93000": {"loss_ce": 0.298, "loss_counter": 0.117, "loss_bbox": 0.065, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.806, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.183, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.806, "loss_caption_0": 2.9, "loss_caption": 2.905, "total_loss": 14.377}, "94000": {"loss_ce": 0.293, "loss_counter": 0.109, "loss_bbox": 0.068, "loss_giou": 0.174, "loss_self_iou": 0.105, "cardinality_error": 3.616, "loss_ce_0": 0.293, "loss_counter_0": 0.109, "loss_bbox_0": 0.069, "loss_giou_0": 0.178, "loss_self_iou_0": 0.106, "cardinality_error_0": 3.616, "loss_caption_0": 2.912, "loss_caption": 2.914, "total_loss": 14.339}, "95000": {"loss_ce": 0.295, "loss_counter": 0.12, "loss_bbox": 0.066, "loss_giou": 0.185, "loss_self_iou": 0.093, "cardinality_error": 3.805, "loss_ce_0": 0.296, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.805, "loss_caption_0": 2.938, "loss_caption": 2.941, "total_loss": 14.546}, "96000": {"loss_ce": 0.292, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.177, "loss_self_iou": 0.103, "cardinality_error": 3.684, "loss_ce_0": 0.293, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.181, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.684, "loss_caption_0": 2.928, "loss_caption": 2.931, "total_loss": 14.434}, "97000": {"loss_ce": 0.297, "loss_counter": 0.111, "loss_bbox": 0.066, "loss_giou": 0.184, "loss_self_iou": 0.095, "cardinality_error": 3.693, "loss_ce_0": 0.298, "loss_counter_0": 0.111, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.693, "loss_caption_0": 2.902, "loss_caption": 2.903, "total_loss": 14.392}, "98000": {"loss_ce": 0.296, "loss_counter": 0.115, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.089, "cardinality_error": 3.738, "loss_ce_0": 0.298, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.184, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.738, "loss_caption_0": 2.896, "loss_caption": 2.902, "total_loss": 14.361}, "99000": {"loss_ce": 0.295, "loss_counter": 0.115, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.095, "cardinality_error": 3.702, "loss_ce_0": 0.296, "loss_counter_0": 0.115, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.702, "loss_caption_0": 2.956, "loss_caption": 2.956, "total_loss": 14.525}, "100000": {"loss_ce": 0.296, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.092, "cardinality_error": 3.751, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.751, "loss_caption_0": 2.932, "loss_caption": 2.932, "total_loss": 14.453}, "101000": {"loss_ce": 0.29, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.699, "loss_ce_0": 0.292, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.699, "loss_caption_0": 2.849, "loss_caption": 2.847, "total_loss": 14.064}, "102000": {"loss_ce": 0.292, "loss_counter": 0.116, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.093, "cardinality_error": 3.695, "loss_ce_0": 0.293, "loss_counter_0": 0.117, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.695, "loss_caption_0": 2.85, "loss_caption": 2.848, "total_loss": 14.087}, "103000": {"loss_ce": 0.293, "loss_counter": 0.115, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.724, "loss_ce_0": 0.293, "loss_counter_0": 0.116, "loss_bbox_0": 0.067, "loss_giou_0": 0.178, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.724, "loss_caption_0": 2.846, "loss_caption": 2.854, "total_loss": 14.092}, "104000": {"loss_ce": 0.289, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.097, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.736, "loss_caption_0": 2.916, "loss_caption": 2.913, "total_loss": 14.362}, "105000": {"loss_ce": 0.288, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.736, "loss_caption_0": 2.907, "loss_caption": 2.902, "total_loss": 14.342}, "106000": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.11, "cardinality_error": 3.775, "loss_ce_0": 0.293, "loss_counter_0": 0.112, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.775, "loss_caption_0": 2.876, "loss_caption": 2.875, "total_loss": 14.264}, "107000": {"loss_ce": 0.291, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.178, "loss_self_iou": 0.099, "cardinality_error": 3.743, "loss_ce_0": 0.291, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.183, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.91, "loss_caption": 2.909, "total_loss": 14.358}, "108000": {"loss_ce": 0.295, "loss_counter": 0.118, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.1, "cardinality_error": 3.81, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.181, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.81, "loss_caption_0": 2.928, "loss_caption": 2.93, "total_loss": 14.446}, "109000": {"loss_ce": 0.294, "loss_counter": 0.118, "loss_bbox": 0.063, "loss_giou": 0.178, "loss_self_iou": 0.091, "cardinality_error": 3.78, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.78, "loss_caption_0": 2.916, "loss_caption": 2.912, "total_loss": 14.396}, "110000": {"loss_ce": 0.297, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.087, "cardinality_error": 3.72, "loss_ce_0": 0.297, "loss_counter_0": 0.113, "loss_bbox_0": 0.065, "loss_giou_0": 0.184, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.72, "loss_caption_0": 2.948, "loss_caption": 2.948, "total_loss": 14.539}, "111000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.095, "cardinality_error": 3.718, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.718, "loss_caption_0": 2.867, "loss_caption": 2.869, "total_loss": 14.14}, "112000": {"loss_ce": 0.287, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.725, "loss_ce_0": 0.289, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.725, "loss_caption_0": 2.844, "loss_caption": 2.842, "total_loss": 14.015}, "113000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.734, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.734, "loss_caption_0": 2.837, "loss_caption": 2.834, "total_loss": 13.981}, "114000": {"loss_ce": 0.283, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.739, "loss_ce_0": 0.285, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.739, "loss_caption_0": 2.855, "loss_caption": 2.857, "total_loss": 14.084}, "115000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.175, "loss_self_iou": 0.092, "cardinality_error": 3.74, "loss_ce_0": 0.284, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.18, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.74, "loss_caption_0": 2.823, "loss_caption": 2.824, "total_loss": 13.959}, "116000": {"loss_ce": 0.286, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.177, "loss_self_iou": 0.088, "cardinality_error": 3.753, "loss_ce_0": 0.288, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.753, "loss_caption_0": 2.846, "loss_caption": 2.843, "total_loss": 14.073}, "117000": {"loss_ce": 0.285, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.755, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.755, "loss_caption_0": 2.804, "loss_caption": 2.81, "total_loss": 13.896}, "118000": {"loss_ce": 0.284, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.093, "cardinality_error": 3.715, "loss_ce_0": 0.285, "loss_counter_0": 0.108, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.715, "loss_caption_0": 2.863, "loss_caption": 2.866, "total_loss": 14.129}, "119000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.064, "loss_giou": 0.176, "loss_self_iou": 0.098, "cardinality_error": 3.735, "loss_ce_0": 0.287, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.735, "loss_caption_0": 2.844, "loss_caption": 2.843, "total_loss": 14.061}, "120000": {"loss_ce": 0.284, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.175, "loss_self_iou": 0.101, "cardinality_error": 3.755, "loss_ce_0": 0.285, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.755, "loss_caption_0": 2.868, "loss_caption": 2.878, "total_loss": 14.168}, "121000": {"loss_ce": 0.283, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.095, "cardinality_error": 3.691, "loss_ce_0": 0.284, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.691, "loss_caption_0": 2.809, "loss_caption": 2.808, "total_loss": 13.835}, "122000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.17, "loss_self_iou": 0.093, "cardinality_error": 3.706, "loss_ce_0": 0.281, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.706, "loss_caption_0": 2.811, "loss_caption": 2.814, "total_loss": 13.867}, "123000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.691, "loss_ce_0": 0.281, "loss_counter_0": 0.11, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.691, "loss_caption_0": 2.789, "loss_caption": 2.797, "total_loss": 13.808}, "124000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.063, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.76, "loss_ce_0": 0.281, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.76, "loss_caption_0": 2.839, "loss_caption": 2.842, "total_loss": 13.984}, "125000": {"loss_ce": 0.281, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.763, "loss_ce_0": 0.282, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.763, "loss_caption_0": 2.81, "loss_caption": 2.815, "total_loss": 13.898}, "126000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.095, "cardinality_error": 3.717, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.183, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.717, "loss_caption_0": 2.789, "loss_caption": 2.787, "total_loss": 13.835}, "127000": {"loss_ce": 0.277, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.764, "loss_caption_0": 2.867, "loss_caption": 2.871, "total_loss": 14.097}, "128000": {"loss_ce": 0.281, "loss_counter": 0.113, "loss_bbox": 0.063, "loss_giou": 0.173, "loss_self_iou": 0.092, "cardinality_error": 3.793, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.793, "loss_caption_0": 2.868, "loss_caption": 2.863, "total_loss": 14.111}, "129000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.1, "cardinality_error": 3.686, "loss_ce_0": 0.283, "loss_counter_0": 0.105, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.686, "loss_caption_0": 2.812, "loss_caption": 2.813, "total_loss": 13.903}, "130000": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.772, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.772, "loss_caption_0": 2.86, "loss_caption": 2.861, "total_loss": 14.105}, "131000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.062, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.75, "loss_ce_0": 0.279, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.75, "loss_caption_0": 2.817, "loss_caption": 2.826, "total_loss": 13.897}, "132000": {"loss_ce": 0.271, "loss_counter": 0.109, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.089, "cardinality_error": 3.814, "loss_ce_0": 0.274, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.814, "loss_caption_0": 2.778, "loss_caption": 2.776, "total_loss": 13.726}, "133000": {"loss_ce": 0.277, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.095, "cardinality_error": 3.773, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.773, "loss_caption_0": 2.843, "loss_caption": 2.843, "total_loss": 13.999}, "134000": {"loss_ce": 0.273, "loss_counter": 0.108, "loss_bbox": 0.065, "loss_giou": 0.171, "loss_self_iou": 0.101, "cardinality_error": 3.743, "loss_ce_0": 0.276, "loss_counter_0": 0.107, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.786, "loss_caption": 2.787, "total_loss": 13.756}, "135000": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.794, "loss_ce_0": 0.281, "loss_counter_0": 0.115, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.794, "loss_caption_0": 2.785, "loss_caption": 2.784, "total_loss": 13.759}, "136000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.065, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.653, "loss_ce_0": 0.279, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.653, "loss_caption_0": 2.828, "loss_caption": 2.834, "total_loss": 13.919}, "137000": {"loss_ce": 0.279, "loss_counter": 0.105, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.099, "cardinality_error": 3.654, "loss_ce_0": 0.281, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.654, "loss_caption_0": 2.79, "loss_caption": 2.799, "total_loss": 13.806}, "138000": {"loss_ce": 0.278, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.171, "loss_self_iou": 0.095, "cardinality_error": 3.714, "loss_ce_0": 0.28, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.714, "loss_caption_0": 2.835, "loss_caption": 2.828, "total_loss": 13.945}, "139000": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.098, "cardinality_error": 3.813, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.813, "loss_caption_0": 2.83, "loss_caption": 2.828, "total_loss": 13.924}, "140000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.09, "cardinality_error": 3.664, "loss_ce_0": 0.28, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.664, "loss_caption_0": 2.821, "loss_caption": 2.823, "total_loss": 13.905}, "141000": {"loss_ce": 0.268, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.171, "loss_self_iou": 0.106, "cardinality_error": 3.774, "loss_ce_0": 0.27, "loss_counter_0": 0.108, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.774, "loss_caption_0": 2.75, "loss_caption": 2.748, "total_loss": 13.572}, "142000": {"loss_ce": 0.27, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.173, "loss_self_iou": 0.091, "cardinality_error": 3.797, "loss_ce_0": 0.272, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.797, "loss_caption_0": 2.72, "loss_caption": 2.722, "total_loss": 13.492}, "143000": {"loss_ce": 0.265, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.162, "loss_self_iou": 0.095, "cardinality_error": 3.637, "loss_ce_0": 0.268, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.171, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.637, "loss_caption_0": 2.782, "loss_caption": 2.782, "total_loss": 13.626}, "144000": {"loss_ce": 0.27, "loss_counter": 0.112, "loss_bbox": 0.062, "loss_giou": 0.172, "loss_self_iou": 0.094, "cardinality_error": 3.831, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.831, "loss_caption_0": 2.793, "loss_caption": 2.79, "total_loss": 13.773}, "145000": {"loss_ce": 0.269, "loss_counter": 0.101, "loss_bbox": 0.061, "loss_giou": 0.16, "loss_self_iou": 0.093, "cardinality_error": 3.665, "loss_ce_0": 0.273, "loss_counter_0": 0.101, "loss_bbox_0": 0.063, "loss_giou_0": 0.168, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.665, "loss_caption_0": 2.762, "loss_caption": 2.767, "total_loss": 13.554}, "146000": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.725, "loss_ce_0": 0.276, "loss_counter_0": 0.109, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.725, "loss_caption_0": 2.813, "loss_caption": 2.813, "total_loss": 13.811}, "147000": {"loss_ce": 0.272, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.097, "cardinality_error": 3.714, "loss_ce_0": 0.273, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 2.747, "loss_caption": 2.745, "total_loss": 13.578}, "148000": {"loss_ce": 0.271, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.728, "loss_ce_0": 0.274, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.728, "loss_caption_0": 2.843, "loss_caption": 2.84, "total_loss": 13.944}, "149000": {"loss_ce": 0.269, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.799, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.068, "loss_giou_0": 0.178, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.799, "loss_caption_0": 2.836, "loss_caption": 2.836, "total_loss": 13.926}, "150000": {"loss_ce": 0.27, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.087, "cardinality_error": 3.703, "loss_ce_0": 0.272, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.703, "loss_caption_0": 2.806, "loss_caption": 2.806, "total_loss": 13.795}, "151000": {"loss_ce": 0.264, "loss_counter": 0.101, "loss_bbox": 0.063, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.645, "loss_ce_0": 0.266, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.171, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.645, "loss_caption_0": 2.762, "loss_caption": 2.759, "total_loss": 13.537}, "152000": {"loss_ce": 0.265, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.087, "cardinality_error": 3.722, "loss_ce_0": 0.269, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.175, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.722, "loss_caption_0": 2.762, "loss_caption": 2.766, "total_loss": 13.59}, "153000": {"loss_ce": 0.264, "loss_counter": 0.111, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.083, "cardinality_error": 3.813, "loss_ce_0": 0.267, "loss_counter_0": 0.111, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.085, "cardinality_error_0": 3.813, "loss_caption_0": 2.777, "loss_caption": 2.778, "total_loss": 13.663}, "154000": {"loss_ce": 0.268, "loss_counter": 0.106, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.769, "loss_ce_0": 0.272, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.769, "loss_caption_0": 2.787, "loss_caption": 2.787, "total_loss": 13.717}, "155000": {"loss_ce": 0.264, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.09, "cardinality_error": 3.714, "loss_ce_0": 0.267, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.714, "loss_caption_0": 2.758, "loss_caption": 2.76, "total_loss": 13.593}, "156000": {"loss_ce": 0.265, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.167, "loss_self_iou": 0.102, "cardinality_error": 3.675, "loss_ce_0": 0.269, "loss_counter_0": 0.106, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.675, "loss_caption_0": 2.741, "loss_caption": 2.742, "total_loss": 13.504}, "157000": {"loss_ce": 0.267, "loss_counter": 0.104, "loss_bbox": 0.065, "loss_giou": 0.167, "loss_self_iou": 0.103, "cardinality_error": 3.722, "loss_ce_0": 0.268, "loss_counter_0": 0.104, "loss_bbox_0": 0.068, "loss_giou_0": 0.176, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.722, "loss_caption_0": 2.777, "loss_caption": 2.783, "total_loss": 13.668}, "158000": {"loss_ce": 0.266, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.099, "cardinality_error": 3.758, "loss_ce_0": 0.27, "loss_counter_0": 0.106, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.758, "loss_caption_0": 2.815, "loss_caption": 2.817, "total_loss": 13.789}, "159000": {"loss_ce": 0.272, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.729, "loss_ce_0": 0.275, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.729, "loss_caption_0": 2.783, "loss_caption": 2.785, "total_loss": 13.721}, "160000": {"loss_ce": 0.269, "loss_counter": 0.109, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.098, "cardinality_error": 3.816, "loss_ce_0": 0.271, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.816, "loss_caption_0": 2.78, "loss_caption": 2.784, "total_loss": 13.686}, "161000": {"loss_ce": 0.26, "loss_counter": 0.103, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.695, "loss_ce_0": 0.263, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.171, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.695, "loss_caption_0": 2.766, "loss_caption": 2.768, "total_loss": 13.553}, "162000": {"loss_ce": 0.262, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.694, "loss_ce_0": 0.266, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.694, "loss_caption_0": 2.768, "loss_caption": 2.764, "total_loss": 13.573}, "163000": {"loss_ce": 0.262, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.173, "loss_self_iou": 0.097, "cardinality_error": 3.769, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 2.765, "loss_caption": 2.766, "total_loss": 13.63}, "164000": {"loss_ce": 0.265, "loss_counter": 0.11, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.774, "loss_ce_0": 0.269, "loss_counter_0": 0.11, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.774, "loss_caption_0": 2.772, "loss_caption": 2.776, "total_loss": 13.625}, "165000": {"loss_ce": 0.264, "loss_counter": 0.102, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.699, "loss_ce_0": 0.267, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.699, "loss_caption_0": 2.711, "loss_caption": 2.716, "total_loss": 13.368}, "166000": {"loss_ce": 0.264, "loss_counter": 0.105, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.72, "loss_ce_0": 0.268, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.72, "loss_caption_0": 2.754, "loss_caption": 2.755, "total_loss": 13.534}, "167000": {"loss_ce": 0.261, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.095, "cardinality_error": 3.712, "loss_ce_0": 0.266, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.712, "loss_caption_0": 2.771, "loss_caption": 2.772, "total_loss": 13.617}, "168000": {"loss_ce": 0.265, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.09, "cardinality_error": 3.816, "loss_ce_0": 0.269, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.816, "loss_caption_0": 2.814, "loss_caption": 2.82, "total_loss": 13.826}, "169000": {"loss_ce": 0.258, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.106, "cardinality_error": 3.697, "loss_ce_0": 0.261, "loss_counter_0": 0.106, "loss_bbox_0": 0.067, "loss_giou_0": 0.176, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.697, "loss_caption_0": 2.769, "loss_caption": 2.775, "total_loss": 13.598}, "170000": {"loss_ce": 0.268, "loss_counter": 0.105, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.093, "cardinality_error": 3.799, "loss_ce_0": 0.272, "loss_counter_0": 0.106, "loss_bbox_0": 0.064, "loss_giou_0": 0.174, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.799, "loss_caption_0": 2.794, "loss_caption": 2.798, "total_loss": 13.727}, "171000": {"loss_ce": 0.256, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.161, "loss_self_iou": 0.094, "cardinality_error": 3.694, "loss_ce_0": 0.261, "loss_counter_0": 0.101, "loss_bbox_0": 0.064, "loss_giou_0": 0.169, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.694, "loss_caption_0": 2.772, "loss_caption": 2.77, "total_loss": 13.544}, "172000": {"loss_ce": 0.258, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.096, "cardinality_error": 3.667, "loss_ce_0": 0.262, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.667, "loss_caption_0": 2.741, "loss_caption": 2.743, "total_loss": 13.47}, "173000": {"loss_ce": 0.258, "loss_counter": 0.104, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.09, "cardinality_error": 3.753, "loss_ce_0": 0.261, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.753, "loss_caption_0": 2.786, "loss_caption": 2.785, "total_loss": 13.646}, "174000": {"loss_ce": 0.259, "loss_counter": 0.107, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.094, "cardinality_error": 3.832, "loss_ce_0": 0.261, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.832, "loss_caption_0": 2.733, "loss_caption": 2.738, "total_loss": 13.457}, "175000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.163, "loss_self_iou": 0.098, "cardinality_error": 3.731, "loss_ce_0": 0.259, "loss_counter_0": 0.103, "loss_bbox_0": 0.062, "loss_giou_0": 0.173, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.731, "loss_caption_0": 2.745, "loss_caption": 2.744, "total_loss": 13.454}, "176000": {"loss_ce": 0.261, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.095, "cardinality_error": 3.795, "loss_ce_0": 0.264, "loss_counter_0": 0.103, "loss_bbox_0": 0.064, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.795, "loss_caption_0": 2.761, "loss_caption": 2.77, "total_loss": 13.575}, "177000": {"loss_ce": 0.255, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.161, "loss_self_iou": 0.096, "cardinality_error": 3.652, "loss_ce_0": 0.261, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.169, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.745, "total_loss": 13.43}, "178000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.103, "cardinality_error": 3.664, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.664, "loss_caption_0": 2.682, "loss_caption": 2.68, "total_loss": 13.211}, "179000": {"loss_ce": 0.261, "loss_counter": 0.105, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.09, "cardinality_error": 3.825, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.825, "loss_caption_0": 2.788, "loss_caption": 2.796, "total_loss": 13.671}, "180000": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 2.781, "loss_caption": 2.775, "total_loss": 13.608}, "181000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.781, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.781, "loss_caption_0": 2.743, "loss_caption": 2.746, "total_loss": 13.452}, "182000": {"loss_ce": 0.255, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.1, "cardinality_error": 3.726, "loss_ce_0": 0.26, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.726, "loss_caption_0": 2.748, "loss_caption": 2.746, "total_loss": 13.472}, "183000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.722, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.722, "loss_caption_0": 2.729, "loss_caption": 2.734, "total_loss": 13.405}, "184000": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.726, "loss_ce_0": 0.257, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.726, "loss_caption_0": 2.783, "loss_caption": 2.787, "total_loss": 13.591}, "185000": {"loss_ce": 0.255, "loss_counter": 0.098, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.087, "cardinality_error": 3.667, "loss_ce_0": 0.26, "loss_counter_0": 0.098, "loss_bbox_0": 0.065, "loss_giou_0": 0.175, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.667, "loss_caption_0": 2.718, "loss_caption": 2.716, "total_loss": 13.354}, "186000": {"loss_ce": 0.254, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.776, "loss_ce_0": 0.259, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.776, "loss_caption_0": 2.75, "loss_caption": 2.75, "total_loss": 13.494}, "187000": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.803, "loss_ce_0": 0.264, "loss_counter_0": 0.109, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.803, "loss_caption_0": 2.788, "loss_caption": 2.791, "total_loss": 13.678}, "188000": {"loss_ce": 0.253, "loss_counter": 0.1, "loss_bbox": 0.062, "loss_giou": 0.163, "loss_self_iou": 0.091, "cardinality_error": 3.71, "loss_ce_0": 0.259, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.71, "loss_caption_0": 2.745, "loss_caption": 2.743, "total_loss": 13.444}, "189000": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.165, "loss_self_iou": 0.1, "cardinality_error": 3.748, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.748, "loss_caption_0": 2.751, "loss_caption": 2.753, "total_loss": 13.484}, "190000": {"loss_ce": 0.257, "loss_counter": 0.104, "loss_bbox": 0.06, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 2.729, "loss_caption": 2.73, "total_loss": 13.395}, "191000": {"loss_ce": 0.251, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.086, "cardinality_error": 3.653, "loss_ce_0": 0.257, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.653, "loss_caption_0": 2.754, "loss_caption": 2.752, "total_loss": 13.501}, "192000": {"loss_ce": 0.252, "loss_counter": 0.1, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.094, "cardinality_error": 3.767, "loss_ce_0": 0.258, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.767, "loss_caption_0": 2.717, "loss_caption": 2.72, "total_loss": 13.343}, "193000": {"loss_ce": 0.25, "loss_counter": 0.106, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.093, "cardinality_error": 3.847, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.847, "loss_caption_0": 2.754, "loss_caption": 2.759, "total_loss": 13.499}, "194000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.165, "loss_self_iou": 0.097, "cardinality_error": 3.775, "loss_ce_0": 0.262, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.775, "loss_caption_0": 2.769, "loss_caption": 2.772, "total_loss": 13.587}, "195000": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.794, "loss_ce_0": 0.261, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.794, "loss_caption_0": 2.751, "loss_caption": 2.751, "total_loss": 13.506}, "196000": {"loss_ce": 0.251, "loss_counter": 0.095, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.1, "cardinality_error": 3.652, "loss_ce_0": 0.258, "loss_counter_0": 0.095, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.735, "total_loss": 13.403}, "197000": {"loss_ce": 0.251, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.091, "cardinality_error": 3.759, "loss_ce_0": 0.258, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.759, "loss_caption_0": 2.74, "loss_caption": 2.743, "total_loss": 13.418}, "198000": {"loss_ce": 0.249, "loss_counter": 0.098, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.092, "cardinality_error": 3.664, "loss_ce_0": 0.255, "loss_counter_0": 0.098, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.664, "loss_caption_0": 2.718, "loss_caption": 2.72, "total_loss": 13.31}, "199000": {"loss_ce": 0.252, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.101, "cardinality_error": 3.736, "loss_ce_0": 0.257, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.736, "loss_caption_0": 2.759, "loss_caption": 2.76, "total_loss": 13.502}, "200000": {"loss_ce": 0.253, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.159, "loss_self_iou": 0.098, "cardinality_error": 3.701, "loss_ce_0": 0.259, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.701, "loss_caption_0": 2.766, "loss_caption": 2.771, "total_loss": 13.518}}, "lr_history": {"1000": 5e-05, "2000": 5e-05, "3000": 5e-05, "4000": 5e-05, "5000": 5e-05, "6000": 5e-05, "7000": 5e-05, "8000": 5e-05, "9000": 5e-05, "10000": 5e-05, "11000": 5e-05, "12000": 5e-05, "13000": 5e-05, "14000": 5e-05, "15000": 5e-05, "16000": 5e-05, "17000": 5e-05, "18000": 5e-05, "19000": 5e-05, "20000": 5e-05, "21000": 5e-05, "22000": 5e-05, "23000": 5e-05, "24000": 5e-05, "25000": 5e-05, "26000": 5e-05, "27000": 5e-05, "28000": 5e-05, "29000": 5e-05, "30000": 5e-05, "31000": 5e-05, "32000": 5e-05, "33000": 5e-05, "34000": 5e-05, "35000": 5e-05, "36000": 5e-05, "37000": 5e-05, "38000": 5e-05, "39000": 5e-05, "40000": 5e-05, "41000": 5e-05, "42000": 5e-05, "43000": 5e-05, "44000": 5e-05, "45000": 5e-05, "46000": 5e-05, "47000": 5e-05, "48000": 5e-05, "49000": 5e-05, "50000": 5e-05, "51000": 5e-05, "52000": 5e-05, "53000": 5e-05, "54000": 5e-05, "55000": 5e-05, "56000": 5e-05, "57000": 5e-05, "58000": 5e-05, "59000": 5e-05, "60000": 5e-05, "61000": 5e-05, "62000": 5e-05, "63000": 5e-05, "64000": 5e-05, "65000": 5e-05, "66000": 5e-05, "67000": 5e-05, "68000": 5e-05, "69000": 5e-05, "70000": 5e-05, "71000": 5e-05, "72000": 5e-05, "73000": 5e-05, "74000": 5e-05, "75000": 5e-05, "76000": 5e-05, "77000": 5e-05, "78000": 5e-05, "79000": 5e-05, "80000": 5e-05, "81000": 2.5e-05, "82000": 2.5e-05, "83000": 2.5e-05, "84000": 2.5e-05, "85000": 2.5e-05, "86000": 2.5e-05, "87000": 2.5e-05, "88000": 2.5e-05, "89000": 2.5e-05, "90000": 2.5e-05, "91000": 2.5e-05, "92000": 2.5e-05, "93000": 2.5e-05, "94000": 2.5e-05, "95000": 2.5e-05, "96000": 2.5e-05, "97000": 2.5e-05, "98000": 2.5e-05, "99000": 2.5e-05, "100000": 2.5e-05, "101000": 2.5e-05, "102000": 2.5e-05, "103000": 2.5e-05, "104000": 2.5e-05, "105000": 2.5e-05, "106000": 2.5e-05, "107000": 2.5e-05, "108000": 2.5e-05, "109000": 2.5e-05, "110000": 2.5e-05, "111000": 1.25e-05, "112000": 1.25e-05, "113000": 1.25e-05, "114000": 1.25e-05, "115000": 1.25e-05, "116000": 1.25e-05, "117000": 1.25e-05, "118000": 1.25e-05, "119000": 1.25e-05, "120000": 1.25e-05, "121000": 1.25e-05, "122000": 1.25e-05, "123000": 1.25e-05, "124000": 1.25e-05, "125000": 1.25e-05, "126000": 1.25e-05, "127000": 1.25e-05, "128000": 1.25e-05, "129000": 1.25e-05, "130000": 1.25e-05, "131000": 1.25e-05, "132000": 1.25e-05, "133000": 1.25e-05, "134000": 1.25e-05, "135000": 1.25e-05, "136000": 1.25e-05, "137000": 1.25e-05, "138000": 1.25e-05, "139000": 1.25e-05, "140000": 1.25e-05, "141000": 6.25e-06, "142000": 6.25e-06, "143000": 6.25e-06, "144000": 6.25e-06, "145000": 6.25e-06, "146000": 6.25e-06, "147000": 6.25e-06, "148000": 6.25e-06, "149000": 6.25e-06, "150000": 6.25e-06, "151000": 6.25e-06, "152000": 6.25e-06, "153000": 6.25e-06, "154000": 6.25e-06, "155000": 6.25e-06, "156000": 6.25e-06, "157000": 6.25e-06, "158000": 6.25e-06, "159000": 6.25e-06, "160000": 6.25e-06, "161000": 6.25e-06, "162000": 6.25e-06, "163000": 6.25e-06, "164000": 6.25e-06, "165000": 6.25e-06, "166000": 6.25e-06, "167000": 6.25e-06, "168000": 6.25e-06, "169000": 6.25e-06, "170000": 6.25e-06, "171000": 3.125e-06, "172000": 3.125e-06, "173000": 3.125e-06, "174000": 3.125e-06, "175000": 3.125e-06, "176000": 3.125e-06, "177000": 3.125e-06, "178000": 3.125e-06, "179000": 3.125e-06, "180000": 3.125e-06, "181000": 3.125e-06, "182000": 3.125e-06, "183000": 3.125e-06, "184000": 3.125e-06, "185000": 3.125e-06, "186000": 3.125e-06, "187000": 3.125e-06, "188000": 3.125e-06, "189000": 3.125e-06, "190000": 3.125e-06, "191000": 3.125e-06, "192000": 3.125e-06, "193000": 3.125e-06, "194000": 3.125e-06, "195000": 3.125e-06, "196000": 3.125e-06, "197000": 3.125e-06, "198000": 3.125e-06, "199000": 3.125e-06, "200000": 3.125e-06}}, "eval_history": {}}
\ No newline at end of file
diff --git a/anet_clip/model-best.pth b/anet_clip/model-best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2e4a90dfdd7212286f1fa03c604791bc873dd013
--- /dev/null
+++ b/anet_clip/model-best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c
+size 397662145
diff --git a/anet_clip/model-last.pth b/anet_clip/model-last.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2e4a90dfdd7212286f1fa03c604791bc873dd013
--- /dev/null
+++ b/anet_clip/model-last.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c
+size 397662145
diff --git a/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0 b/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0
new file mode 100644
index 0000000000000000000000000000000000000000..07e67491b1b155acb14936e9a4a779d873fa5fd9
--- /dev/null
+++ b/anet_clip/tf_summary/events.out.tfevents.1710744132.dlc1fj0sg6kl2yx3-master-0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9041922000c6e059adc92858535f91dd695cc35eb5dae7c5d5d47360108999
+size 180967
diff --git a/anet_clip/train.log b/anet_clip/train.log
new file mode 100644
index 0000000000000000000000000000000000000000..765923c87c550a2ac7c30e84294d23b156013f81
--- /dev/null
+++ b/anet_clip/train.log
@@ -0,0 +1,1520 @@
+backup evironment completed !
+Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal
+
+
+******************** All args: *************************************************
+align_contiguous = False
+align_drop_z = 0
+align_keep_percentile = 0.1
+align_many_to_one = False
+align_one_to_many = False
+align_top_band_size = 0
+att_hid_size = 512
+aux_loss = True
+backbone = None
+base_cfg_path = cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+basic_ss_prob = 0
+batch_size = 1
+batch_size_for_eval = 1
+bbox_loss_coef = 0
+beta = 1
+cap_dec_n_points = 4
+cap_nheads = 1
+cap_num_feature_levels = 4
+cap_prob_clip = False
+caption_cost_type = loss
+caption_decoder_type = standard
+caption_loss_coef = 2
+cfg_path = cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
+cl_schedule_time = [0, 2]
+cl_schedule_val = [0, 0.1]
+clip_context_dim = 512
+cls_loss_coef = 2
+contrastive_hidden_size = 128
+contrastive_loss_start_coef = 0.0
+contrastive_loss_temperature = 0.1
+cost_alpha = 0.25
+cost_gamma = 2
+count_loss_coef = 0.5
+criteria_for_best_ckpt = overall
+current_lr = 5e-05
+data_norm = 0
+data_rescale = 1
+debug = False
+dec_layers = 2
+dec_n_points = 4
+device = cuda
+dict_file = data/howto/vocabulary_howto_rate2_anet.json
+dict_file_val = data/howto/vocabulary_howto_rate2_anet.json
+dilation = False
+disable_contrastive_projection = 1
+disable_cudnn = 0
+disable_mid_caption_heads = False
+disable_rematch = False
+disable_tqdm = False
+drop_prob = 0.5
+ec_alpha = 1.0
+enable_bg_for_cl = True
+enable_contrastive = False
+enable_cross_video_cl = True
+enable_e2t_cl = True
+enc_layers = 2
+enc_n_points = 4
+eos_coef = 0.1
+epoch = 20
+eval_proposal_file = data/generated_proposals/dbg_trainval_top100.json
+event_context_dim = None
+feature_dim = 768
+feature_sample_rate = 1
+fix_xcw = 1
+focal_alpha = 0.25
+focal_gamma = 2.0
+focal_mil = False
+frame_embedding_num = 100
+ft_gt_percent = 1.0
+giou_loss_coef = 4
+gpu_id = []
+grad_clip = 100.0
+gt_file_for_auc = data/anet/captiondata/val_all.json
+gt_file_for_eval = ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval = ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+gt_proposal_sample_num = 20
+hidden_dim = 512
+hidden_dropout_prob = 0.5
+huggingface_cache_dir = .cache
+id = seq2-ft(mix)-gt_percent-1.0
+id_ori = 
+input_encoding_size = 512
+invalid_video_json = []
+iteration = 3
+layer_norm_eps = 1e-12
+learning_rate_decay_every = 3
+learning_rate_decay_rate = 0.5
+learning_rate_decay_start = 8
+lloss_beta = 1
+lloss_cross_entropy = 0
+lloss_focal_loss = 0
+lloss_gau_mask = 1
+lr = 5e-05
+lr_backbone = 2e-05
+lr_backbone_names = ['None']
+lr_linear_proj_mult = 0.1
+lr_linear_proj_names = ['reference_points', 'sampling_offsets']
+lr_proj = 0
+map = True
+matcher_type = default
+max_caption_len = 50
+max_eseq_length = 10
+max_pos_num = 500
+max_text_input_len = 32
+merge_criterion = ins_cap_topk
+merge_k_boxes = 3
+merge_mode = weighted_sum
+mil_loss_coef = 0
+min_epoch_when_save = -1
+nheads = 8
+norm_ins_score = sigmoid
+nthreads = 4
+num_classes = 1
+num_feature_levels = 4
+num_layers = 1
+num_neg_box = 10
+num_queries = 100
+optimizer_type = adam
+position_embedding = sine
+position_embedding_scale = 6.283185307179586
+pre_percent = 1.0
+pretrain = None
+pretrain_path = 
+pretrained_language_model = CLIP
+prior_anchor_duration_init = True
+prior_manner = all
+pseudo_box_aug = False
+pseudo_box_aug_mode = random_range
+pseudo_box_aug_num = 8
+pseudo_box_aug_ratio = 0.02
+pseudo_box_type = similarity_op_order_v2
+random_anchor_init = True
+random_seed = False
+ref_rank_loss_coef = 0.0
+refine_pseudo_box = False
+refine_pseudo_stage_num = 2
+rnn_size = 512
+sample_method = nearest
+save_all_checkpoint = 0
+save_checkpoint_every = 1
+save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs
+scheduled_sampling_increase_every = 2
+scheduled_sampling_increase_prob = 0.05
+scheduled_sampling_max_prob = 0.25
+scheduled_sampling_start = -1
+seed = 777
+self_iou_loss_coef = 0.0
+set_cost_bbox = 0
+set_cost_caption = 0
+set_cost_cl = 0.0
+set_cost_class = 2
+set_cost_giou = 4
+set_cost_sim = 1.0
+share_caption_head = 1
+soft_attention = 1
+start_from = 
+start_from_mode = last
+start_refine_epoch = -1
+statistic_mode = mode
+text_encoder_learning_strategy = frozen
+text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/']
+text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/']
+text_hidden_dim = 768
+top_frames = 30
+train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json']
+train_proposal_file = data/generated_proposals/dbg_trainval_top100.json
+train_proposal_sample_num = 30
+train_proposal_type = gt
+training_scheme = all
+transformer_dropout_prob = 0.1
+transformer_ff_dim = 512
+transformer_input_type = queries
+use_additional_cap_layer = False
+use_additional_score_layer = False
+use_anchor = 0
+use_neg_pseudo_box = False
+use_pseudo_box = False
+use_query_box_for_refine = 0
+val_caption_file = data/anet/captiondata/val_1.json
+visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/']
+visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/']
+visual_feature_type = ['CLIP']
+vocab_size = 16221
+vocab_size_val = 16221
+weight_decay = 0.0001
+weighted_mil_loss = False
+width_ratio = 1
+width_th = 1
+window_size = 2
+with_box_refine = 1
+wordRNN_input_feats_type = C
+
+
+******************** Model structure: ******************************************
+PDVC(
+  (base_encoder): BaseEncoder(
+    (pos_embed): PositionEmbeddingSine(
+      (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True)
+    )
+    (input_proj): ModuleList(
+      (0): Sequential(
+        (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (1): Sequential(
+        (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (2): Sequential(
+        (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (3): Sequential(
+        (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+    )
+  )
+  (transformer): DeformableTransformer(
+    (encoder): DeformableTransformerEncoder(
+      (layers): ModuleList(
+        (0): DeformableTransformerEncoderLayer(
+          (self_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+        (1): DeformableTransformerEncoderLayer(
+          (self_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+      )
+    )
+    (decoder): DeformableTransformerDecoder(
+      (layers): ModuleList(
+        (0): DeformableTransformerDecoderLayer(
+          (cross_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (self_attn): MultiheadAttention(
+            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout4): Dropout(p=0.1, inplace=False)
+          (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+        (1): DeformableTransformerDecoderLayer(
+          (cross_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (self_attn): MultiheadAttention(
+            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout4): Dropout(p=0.1, inplace=False)
+          (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+      )
+      (bbox_head): ModuleList(
+        (0): MLP(
+          (layers): ModuleList(
+            (0): Linear(in_features=512, out_features=512, bias=True)
+            (1): Linear(in_features=512, out_features=512, bias=True)
+            (2): Linear(in_features=512, out_features=2, bias=True)
+          )
+        )
+        (1): MLP(
+          (layers): ModuleList(
+            (0): Linear(in_features=512, out_features=512, bias=True)
+            (1): Linear(in_features=512, out_features=512, bias=True)
+            (2): Linear(in_features=512, out_features=2, bias=True)
+          )
+        )
+      )
+    )
+    (pos_trans): Linear(in_features=512, out_features=1024, bias=True)
+    (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
+    (reference_points): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (caption_head): ModuleList(
+    (0): LSTMDSACaptioner(
+      (embed): Embedding(16222, 512)
+      (logit): Linear(in_features=512, out_features=16222, bias=True)
+      (dropout): Dropout(p=0.5, inplace=False)
+      (core): ShowAttendTellCore(
+        (rnn): LSTM(1536, 512, bias=False, dropout=0.5)
+        (att_drop): Dropout(p=0.5, inplace=False)
+        (deformable_att): MSDeformAttnCap(
+          (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
+          (attention_weights): Linear(in_features=1024, out_features=16, bias=True)
+          (value_proj): Linear(in_features=512, out_features=512, bias=True)
+          (output_proj): Linear(in_features=512, out_features=512, bias=True)
+        )
+        (ctx2att): Linear(in_features=512, out_features=512, bias=True)
+        (h2att): Linear(in_features=512, out_features=512, bias=True)
+        (alpha_net): Linear(in_features=512, out_features=1, bias=True)
+      )
+    )
+    (1): LSTMDSACaptioner(
+      (embed): Embedding(16222, 512)
+      (logit): Linear(in_features=512, out_features=16222, bias=True)
+      (dropout): Dropout(p=0.5, inplace=False)
+      (core): ShowAttendTellCore(
+        (rnn): LSTM(1536, 512, bias=False, dropout=0.5)
+        (att_drop): Dropout(p=0.5, inplace=False)
+        (deformable_att): MSDeformAttnCap(
+          (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
+          (attention_weights): Linear(in_features=1024, out_features=16, bias=True)
+          (value_proj): Linear(in_features=512, out_features=512, bias=True)
+          (output_proj): Linear(in_features=512, out_features=512, bias=True)
+        )
+        (ctx2att): Linear(in_features=512, out_features=512, bias=True)
+        (h2att): Linear(in_features=512, out_features=512, bias=True)
+        (alpha_net): Linear(in_features=512, out_features=1, bias=True)
+      )
+    )
+  )
+  (query_embed): Embedding(100, 1024)
+  (class_head): ModuleList(
+    (0): Linear(in_features=512, out_features=1, bias=True)
+    (1): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (class_refine_head): ModuleList(
+    (0): Linear(in_features=512, out_features=1, bias=True)
+    (1): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (count_head): ModuleList(
+    (0): Linear(in_features=512, out_features=11, bias=True)
+    (1): Linear(in_features=512, out_features=11, bias=True)
+  )
+  (bbox_head): ModuleList(
+    (0): MLP(
+      (layers): ModuleList(
+        (0): Linear(in_features=512, out_features=512, bias=True)
+        (1): Linear(in_features=512, out_features=512, bias=True)
+        (2): Linear(in_features=512, out_features=2, bias=True)
+      )
+    )
+    (1): MLP(
+      (layers): ModuleList(
+        (0): Linear(in_features=512, out_features=512, bias=True)
+        (1): Linear(in_features=512, out_features=512, bias=True)
+        (2): Linear(in_features=512, out_features=2, bias=True)
+      )
+    )
+  )
+  (contrastive_projection_event): ModuleList(
+    (0): Identity()
+    (1): Identity()
+  )
+  (contrastive_projection_text): ModuleList(
+    (0): Identity()
+    (1): Identity()
+  )
+)
+
+
+******************** Strat training ! ******************************************
+loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0'])
+loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0])
+ID seq2-ft(mix)-gt_percent-1.0 iter 1000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.126), ('loss_bbox', 0.117), ('loss_giou', 0.275), ('loss_self_iou', 0.126), ('cardinality_error', 3.775), ('loss_ce_0', 0.284), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.118), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.126), ('cardinality_error_0', 3.775), ('loss_caption_0', 3.781), ('loss_caption', 3.778), ('total_loss', 18.585)]), 
+time/iter = 0.182, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.119), ('loss_bbox', 0.087), ('loss_giou', 0.239), ('loss_self_iou', 0.12), ('cardinality_error', 3.705), ('loss_ce_0', 0.289), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.087), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.121), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.682), ('loss_caption', 3.675), ('total_loss', 17.896)]), 
+time/iter = 0.180, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.227), ('loss_self_iou', 0.098), ('cardinality_error', 3.705), ('loss_ce_0', 0.292), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.668), ('loss_caption', 3.664), ('total_loss', 17.771)]), 
+time/iter = 0.181, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.126), ('loss_bbox', 0.078), ('loss_giou', 0.224), ('loss_self_iou', 0.1), ('cardinality_error', 3.784), ('loss_ce_0', 0.291), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.624), ('loss_caption', 3.629), ('total_loss', 17.579)]), 
+time/iter = 0.174, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.121), ('loss_bbox', 0.08), ('loss_giou', 0.218), ('loss_self_iou', 0.114), ('cardinality_error', 3.674), ('loss_ce_0', 0.287), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.08), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.674), ('loss_caption_0', 3.629), ('loss_caption', 3.629), ('total_loss', 17.526)]), 
+time/iter = 0.178, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.13), ('loss_bbox', 0.076), ('loss_giou', 0.22), ('loss_self_iou', 0.098), ('cardinality_error', 3.786), ('loss_ce_0', 0.293), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.786), ('loss_caption_0', 3.625), ('loss_caption', 3.622), ('total_loss', 17.555)]), 
+time/iter = 0.182, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.215), ('loss_self_iou', 0.097), ('cardinality_error', 3.746), ('loss_ce_0', 0.293), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.746), ('loss_caption_0', 3.58), ('loss_caption', 3.576), ('total_loss', 17.319)]), 
+time/iter = 0.179, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.129), ('loss_bbox', 0.078), ('loss_giou', 0.218), ('loss_self_iou', 0.108), ('cardinality_error', 3.754), ('loss_ce_0', 0.288), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.079), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.754), ('loss_caption_0', 3.546), ('loss_caption', 3.546), ('total_loss', 17.209)]), 
+time/iter = 0.184, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.078), ('loss_giou', 0.219), ('loss_self_iou', 0.1), ('cardinality_error', 3.685), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.219), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.685), ('loss_caption_0', 3.544), ('loss_caption', 3.54), ('total_loss', 17.2)]), 
+time/iter = 0.180, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10000 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.22), ('loss_self_iou', 0.101), ('cardinality_error', 3.748), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.748), ('loss_caption_0', 3.582), ('loss_caption', 3.577), ('total_loss', 17.376)]), 
+time/iter = 0.180, bad_vid = 0.000
+
+Validation results of iter 10009:
+Bleu_1:0.15656016917085527
+Bleu_2:0.08210369852679855
+Bleu_3:0.042491746140277446
+Bleu_4:0.021149866989626908
+METEOR:0.08752782819459405
+ROUGE_L:0.1577032846084498
+CIDEr:0.2687260839927409
+Recall:0.4986985069085389
+Precision:0.548450952477792
+soda_c:0.045070258467165024
+para_Bleu_1:0.36987086578065714
+para_Bleu_2:0.1987998709052068
+para_Bleu_3:0.11671522868501899
+para_Bleu_4:0.07164097958462183
+para_METEOR:0.13901753612789455
+para_ROUGE_L:0.2826680559963382
+para_CIDEr:0.0956891322121665
+
+overall score of iter 10009: 0.3063476479246829
+
+Save model at iter 10009 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 10009 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 11000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.124), ('loss_bbox', 0.077), ('loss_giou', 0.217), ('loss_self_iou', 0.101), ('cardinality_error', 3.788), ('loss_ce_0', 0.292), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.788), ('loss_caption_0', 3.446), ('loss_caption', 3.443), ('total_loss', 16.802)]), 
+time/iter = 0.707, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.694), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.694), ('loss_caption_0', 3.427), ('loss_caption', 3.428), ('total_loss', 16.701)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.217), ('loss_self_iou', 0.107), ('cardinality_error', 3.689), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.689), ('loss_caption_0', 3.464), ('loss_caption', 3.461), ('total_loss', 16.871)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.21), ('loss_self_iou', 0.1), ('cardinality_error', 3.663), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.663), ('loss_caption_0', 3.414), ('loss_caption', 3.41), ('total_loss', 16.616)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.127), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.828), ('loss_ce_0', 0.296), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.828), ('loss_caption_0', 3.453), ('loss_caption', 3.453), ('total_loss', 16.836)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.073), ('loss_giou', 0.206), ('loss_self_iou', 0.105), ('cardinality_error', 3.687), ('loss_ce_0', 0.297), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.461), ('loss_caption', 3.462), ('total_loss', 16.803)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.127), ('loss_bbox', 0.073), ('loss_giou', 0.208), ('loss_self_iou', 0.102), ('cardinality_error', 3.791), ('loss_ce_0', 0.3), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.791), ('loss_caption_0', 3.469), ('loss_caption', 3.465), ('total_loss', 16.864)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.074), ('loss_giou', 0.205), ('loss_self_iou', 0.107), ('cardinality_error', 3.68), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.68), ('loss_caption_0', 3.478), ('loss_caption', 3.475), ('total_loss', 16.859)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.073), ('loss_giou', 0.207), ('loss_self_iou', 0.099), ('cardinality_error', 3.752), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.396), ('loss_caption', 3.396), ('total_loss', 16.585)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20000 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.128), ('loss_bbox', 0.071), ('loss_giou', 0.208), ('loss_self_iou', 0.101), ('cardinality_error', 3.804), ('loss_ce_0', 0.304), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.804), ('loss_caption_0', 3.42), ('loss_caption', 3.419), ('total_loss', 16.684)]), 
+time/iter = 0.189, bad_vid = 0.000
+
+Validation results of iter 20018:
+Bleu_1:0.15965966113561106
+Bleu_2:0.08785069799970043
+Bleu_3:0.04739925348589703
+Bleu_4:0.02377096308421814
+METEOR:0.09062964515721111
+ROUGE_L:0.1652647774491388
+CIDEr:0.27366191469495676
+Recall:0.45131293652113946
+Precision:0.5379414954918249
+soda_c:0.04303682007432423
+para_Bleu_1:0.3640361416830845
+para_Bleu_2:0.1986476696673755
+para_Bleu_3:0.11814800235116821
+para_Bleu_4:0.07336184523852665
+para_METEOR:0.13911724177507803
+para_ROUGE_L:0.28211794880017504
+para_CIDEr:0.08634617454158834
+
+overall score of iter 20018: 0.29882526155519307
+
+Save model at iter 20018 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 21000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.202), ('loss_self_iou', 0.101), ('cardinality_error', 3.666), ('loss_ce_0', 0.299), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.666), ('loss_caption_0', 3.344), ('loss_caption', 3.335), ('total_loss', 16.294)]), 
+time/iter = 0.726, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.201), ('loss_self_iou', 0.109), ('cardinality_error', 3.752), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.302), ('loss_caption', 3.304), ('total_loss', 16.116)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.128), ('loss_bbox', 0.077), ('loss_giou', 0.208), ('loss_self_iou', 0.113), ('cardinality_error', 3.803), ('loss_ce_0', 0.299), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.112), ('cardinality_error_0', 3.803), ('loss_caption_0', 3.348), ('loss_caption', 3.34), ('total_loss', 16.363)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.122), ('loss_bbox', 0.076), ('loss_giou', 0.207), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.294), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 3.354), ('loss_caption', 3.351), ('total_loss', 16.364)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.213), ('loss_self_iou', 0.091), ('cardinality_error', 3.734), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.734), ('loss_caption_0', 3.372), ('loss_caption', 3.372), ('total_loss', 16.494)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.125), ('loss_bbox', 0.072), ('loss_giou', 0.203), ('loss_self_iou', 0.096), ('cardinality_error', 3.784), ('loss_ce_0', 0.299), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.334), ('loss_caption', 3.333), ('total_loss', 16.279)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 27000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.118), ('loss_bbox', 0.076), ('loss_giou', 0.203), ('loss_self_iou', 0.102), ('cardinality_error', 3.64), ('loss_ce_0', 0.291), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.64), ('loss_caption_0', 3.348), ('loss_caption', 3.345), ('total_loss', 16.287)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 28000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.201), ('loss_self_iou', 0.095), ('cardinality_error', 3.774), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.774), ('loss_caption_0', 3.337), ('loss_caption', 3.333), ('total_loss', 16.249)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 29000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.12), ('loss_bbox', 0.075), ('loss_giou', 0.204), ('loss_self_iou', 0.1), ('cardinality_error', 3.755), ('loss_ce_0', 0.299), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.755), ('loss_caption_0', 3.315), ('loss_caption', 3.321), ('total_loss', 16.223)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 30000 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.119), ('loss_bbox', 0.071), ('loss_giou', 0.195), ('loss_self_iou', 0.103), ('cardinality_error', 3.72), ('loss_ce_0', 0.302), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.72), ('loss_caption_0', 3.347), ('loss_caption', 3.349), ('total_loss', 16.283)]), 
+time/iter = 0.195, bad_vid = 0.000
+
+Validation results of iter 30027:
+Bleu_1:0.15440507165989542
+Bleu_2:0.08178273697953425
+Bleu_3:0.042600749568780155
+Bleu_4:0.02119123483046711
+METEOR:0.08563216148714695
+ROUGE_L:0.156809182143994
+CIDEr:0.25960752079137744
+Recall:0.5075951227720545
+Precision:0.571834112941489
+soda_c:0.048597974030683
+para_Bleu_1:0.3985431504573892
+para_Bleu_2:0.22415947108296613
+para_Bleu_3:0.1341003834690626
+para_Bleu_4:0.08312155143550452
+para_METEOR:0.1510085678983445
+para_ROUGE_L:0.2957598062989384
+para_CIDEr:0.12271570278513648
+
+overall score of iter 30027: 0.3568458221189855
+
+Save model at iter 30027 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 30027 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 31000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.123), ('loss_bbox', 0.073), ('loss_giou', 0.202), ('loss_self_iou', 0.114), ('cardinality_error', 3.772), ('loss_ce_0', 0.296), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.772), ('loss_caption_0', 3.24), ('loss_caption', 3.242), ('total_loss', 15.889)]), 
+time/iter = 0.725, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 32000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.093), ('cardinality_error', 3.66), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.251), ('loss_caption', 3.248), ('total_loss', 15.869)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 33000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.787), ('loss_ce_0', 0.301), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.223), ('loss_caption', 3.225), ('total_loss', 15.81)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 34000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.121), ('loss_bbox', 0.076), ('loss_giou', 0.201), ('loss_self_iou', 0.107), ('cardinality_error', 3.719), ('loss_ce_0', 0.296), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.719), ('loss_caption_0', 3.21), ('loss_caption', 3.206), ('total_loss', 15.752)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 35000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.074), ('loss_giou', 0.201), ('loss_self_iou', 0.1), ('cardinality_error', 3.761), ('loss_ce_0', 0.304), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.761), ('loss_caption_0', 3.261), ('loss_caption', 3.267), ('total_loss', 16.006)]), 
+time/iter = 0.187, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 36000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.12), ('loss_bbox', 0.074), ('loss_giou', 0.202), ('loss_self_iou', 0.096), ('cardinality_error', 3.731), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.731), ('loss_caption_0', 3.322), ('loss_caption', 3.322), ('total_loss', 16.237)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 37000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.088), ('cardinality_error', 3.747), ('loss_ce_0', 0.306), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.276), ('loss_caption', 3.278), ('total_loss', 16.005)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 38000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.122), ('loss_bbox', 0.073), ('loss_giou', 0.198), ('loss_self_iou', 0.096), ('cardinality_error', 3.747), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.26), ('loss_caption', 3.267), ('total_loss', 15.944)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 39000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.073), ('loss_giou', 0.194), ('loss_self_iou', 0.096), ('cardinality_error', 3.714), ('loss_ce_0', 0.3), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 3.29), ('loss_caption', 3.284), ('total_loss', 16.029)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 40000 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.068), ('loss_giou', 0.187), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 3.255), ('loss_caption', 3.258), ('total_loss', 15.861)]), 
+time/iter = 0.191, bad_vid = 0.000
+
+Validation results of iter 40036:
+Bleu_1:0.16003947012491918
+Bleu_2:0.08640386650819816
+Bleu_3:0.045769192920880976
+Bleu_4:0.023139762266241797
+METEOR:0.08893476927946467
+ROUGE_L:0.16285119298911696
+CIDEr:0.27850058398714506
+Recall:0.4974410652224822
+Precision:0.571762083926507
+soda_c:0.04898353247531122
+para_Bleu_1:0.4116267700746525
+para_Bleu_2:0.23315066082372427
+para_Bleu_3:0.139785630195007
+para_Bleu_4:0.08689414164874545
+para_METEOR:0.15321412716959742
+para_ROUGE_L:0.2993749803089721
+para_CIDEr:0.12755194391496638
+
+overall score of iter 40036: 0.3676602127333093
+
+Save model at iter 40036 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 40036 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 41000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.196), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.159), ('loss_caption', 3.162), ('total_loss', 15.549)]), 
+time/iter = 0.733, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 42000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.698), ('loss_ce_0', 0.298), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.698), ('loss_caption_0', 3.191), ('loss_caption', 3.187), ('total_loss', 15.571)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 43000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.198), ('loss_self_iou', 0.089), ('cardinality_error', 3.785), ('loss_ce_0', 0.306), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.785), ('loss_caption_0', 3.247), ('loss_caption', 3.249), ('total_loss', 15.93)]), 
+time/iter = 0.195, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 44000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.072), ('loss_giou', 0.194), ('loss_self_iou', 0.104), ('cardinality_error', 3.727), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.727), ('loss_caption_0', 3.228), ('loss_caption', 3.227), ('total_loss', 15.794)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 45000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.194), ('loss_self_iou', 0.094), ('cardinality_error', 3.684), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.684), ('loss_caption_0', 3.138), ('loss_caption', 3.143), ('total_loss', 15.458)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 46000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.194), ('loss_self_iou', 0.107), ('cardinality_error', 3.8), ('loss_ce_0', 0.301), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.8), ('loss_caption_0', 3.198), ('loss_caption', 3.202), ('total_loss', 15.69)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 47000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.071), ('loss_giou', 0.193), ('loss_self_iou', 0.1), ('cardinality_error', 3.724), ('loss_ce_0', 0.302), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.166), ('loss_caption', 3.167), ('total_loss', 15.544)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 48000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.074), ('loss_giou', 0.194), ('loss_self_iou', 0.1), ('cardinality_error', 3.779), ('loss_ce_0', 0.303), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.779), ('loss_caption_0', 3.197), ('loss_caption', 3.204), ('total_loss', 15.693)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 49000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.186), ('loss_self_iou', 0.103), ('cardinality_error', 3.67), ('loss_ce_0', 0.299), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.67), ('loss_caption_0', 3.197), ('loss_caption', 3.193), ('total_loss', 15.597)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 50000 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.191), ('loss_self_iou', 0.1), ('cardinality_error', 3.769), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 3.195), ('loss_caption', 3.196), ('total_loss', 15.646)]), 
+time/iter = 0.193, bad_vid = 0.000
+
+Validation results of iter 50045:
+Bleu_1:0.1612752203314224
+Bleu_2:0.08712092952271142
+Bleu_3:0.04643407984417907
+Bleu_4:0.024237450149938583
+METEOR:0.0888552980469009
+ROUGE_L:0.16165678007821221
+CIDEr:0.28844655875134945
+Recall:0.5079771255793173
+Precision:0.5707494407158785
+soda_c:0.05143467092505771
+para_Bleu_1:0.425828341023263
+para_Bleu_2:0.2431293051387748
+para_Bleu_3:0.14662751878582
+para_Bleu_4:0.09131956416083617
+para_METEOR:0.15868276543147294
+para_ROUGE_L:0.30762031965083425
+para_CIDEr:0.1438790695271004
+
+overall score of iter 50045: 0.39388139911940956
+
+Save model at iter 50045 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 50045 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 51000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.119), ('loss_bbox', 0.072), ('loss_giou', 0.19), ('loss_self_iou', 0.1), ('cardinality_error', 3.708), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.708), ('loss_caption_0', 3.123), ('loss_caption', 3.122), ('total_loss', 15.345)]), 
+time/iter = 0.739, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 52000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.195), ('loss_self_iou', 0.091), ('cardinality_error', 3.787), ('loss_ce_0', 0.302), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.08), ('loss_caption', 3.08), ('total_loss', 15.224)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 53000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.192), ('loss_self_iou', 0.101), ('cardinality_error', 3.688), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.688), ('loss_caption_0', 3.121), ('loss_caption', 3.125), ('total_loss', 15.366)]), 
+time/iter = 0.196, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 54000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.66), ('loss_ce_0', 0.303), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.151), ('loss_caption', 3.158), ('total_loss', 15.44)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 55000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.759), ('loss_ce_0', 0.314), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.759), ('loss_caption_0', 3.137), ('loss_caption', 3.138), ('total_loss', 15.427)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 56000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.7), ('loss_ce_0', 0.303), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.7), ('loss_caption_0', 3.128), ('loss_caption', 3.132), ('total_loss', 15.353)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 57000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.125), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.833), ('loss_ce_0', 0.308), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.833), ('loss_caption_0', 3.157), ('loss_caption', 3.154), ('total_loss', 15.516)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 58000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.072), ('loss_giou', 0.192), ('loss_self_iou', 0.099), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.092), ('loss_caption', 3.088), ('total_loss', 15.209)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 59000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.092), ('cardinality_error', 3.806), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.806), ('loss_caption_0', 3.204), ('loss_caption', 3.204), ('total_loss', 15.668)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 60000 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.73), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.185), ('loss_caption', 3.179), ('total_loss', 15.62)]), 
+time/iter = 0.192, bad_vid = 0.000
+
+Validation results of iter 60054:
+Bleu_1:0.16203040821313286
+Bleu_2:0.087418866671477
+Bleu_3:0.04641401855891123
+Bleu_4:0.023872355329811287
+METEOR:0.08736154709181514
+ROUGE_L:0.16095171754962678
+CIDEr:0.3019460931650574
+Recall:0.5237442505746305
+Precision:0.5691986983933232
+soda_c:0.05366939846142926
+para_Bleu_1:0.4285515683378188
+para_Bleu_2:0.24896313523930838
+para_Bleu_3:0.15083849533584295
+para_Bleu_4:0.09425440122753082
+para_METEOR:0.15418242275887206
+para_ROUGE_L:0.3037081433191389
+para_CIDEr:0.16822639157343386
+
+overall score of iter 60054: 0.41666321555983676
+
+Save model at iter 60054 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 60054 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 61000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.687), ('loss_ce_0', 0.303), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.025), ('loss_caption', 3.031), ('total_loss', 14.914)]), 
+time/iter = 0.715, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 62000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.125), ('loss_bbox', 0.068), ('loss_giou', 0.192), ('loss_self_iou', 0.088), ('cardinality_error', 3.809), ('loss_ce_0', 0.304), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.809), ('loss_caption_0', 3.067), ('loss_caption', 3.064), ('total_loss', 15.147)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 63000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.072), ('loss_giou', 0.189), ('loss_self_iou', 0.102), ('cardinality_error', 3.636), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.636), ('loss_caption_0', 3.09), ('loss_caption', 3.083), ('total_loss', 15.188)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 64000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.185), ('loss_self_iou', 0.105), ('cardinality_error', 3.738), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.738), ('loss_caption_0', 3.09), ('loss_caption', 3.088), ('total_loss', 15.193)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 65000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.191), ('loss_self_iou', 0.094), ('cardinality_error', 3.735), ('loss_ce_0', 0.304), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.735), ('loss_caption_0', 3.087), ('loss_caption', 3.083), ('total_loss', 15.203)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 66000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.121), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.095), ('cardinality_error', 3.753), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.093), ('loss_caption', 3.093), ('total_loss', 15.235)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 67000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.189), ('loss_self_iou', 0.099), ('cardinality_error', 3.781), ('loss_ce_0', 0.299), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.781), ('loss_caption_0', 3.104), ('loss_caption', 3.095), ('total_loss', 15.24)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 68000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.702), ('loss_ce_0', 0.3), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.702), ('loss_caption_0', 3.092), ('loss_caption', 3.087), ('total_loss', 15.171)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 69000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.116), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.087), ('cardinality_error', 3.705), ('loss_ce_0', 0.303), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.087), ('loss_caption', 3.084), ('total_loss', 15.154)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 70000 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.119), ('loss_bbox', 0.07), ('loss_giou', 0.188), ('loss_self_iou', 0.104), ('cardinality_error', 3.763), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.763), ('loss_caption_0', 3.137), ('loss_caption', 3.142), ('total_loss', 15.421)]), 
+time/iter = 0.201, bad_vid = 0.000
+
+Validation results of iter 70063:
+Bleu_1:0.17095715677415013
+Bleu_2:0.0951967897773989
+Bleu_3:0.05145074727592996
+Bleu_4:0.026686223548170303
+METEOR:0.09033289555302068
+ROUGE_L:0.16939818741017104
+CIDEr:0.33299543538258497
+Recall:0.5001550726802355
+Precision:0.5629321740898863
+soda_c:0.05378783144134501
+para_Bleu_1:0.44719474980697405
+para_Bleu_2:0.2615784516531111
+para_Bleu_3:0.15956746990786394
+para_Bleu_4:0.09983770060804388
+para_METEOR:0.15549284849496958
+para_ROUGE_L:0.30852597622578265
+para_CIDEr:0.18758102150887232
+
+overall score of iter 70063: 0.4429115706118858
+
+Save model at iter 70063 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 70063 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 71000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.115), ('loss_bbox', 0.067), ('loss_giou', 0.187), ('loss_self_iou', 0.091), ('cardinality_error', 3.724), ('loss_ce_0', 0.304), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.994), ('loss_caption', 2.994), ('total_loss', 14.812)]), 
+time/iter = 0.691, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 72000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.099), ('cardinality_error', 3.665), ('loss_ce_0', 0.296), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.995), ('loss_caption', 3.0), ('total_loss', 14.803)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 73000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.067), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.762), ('loss_ce_0', 0.302), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.762), ('loss_caption_0', 3.03), ('loss_caption', 3.034), ('total_loss', 14.924)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 74000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.181), ('loss_self_iou', 0.093), ('cardinality_error', 3.722), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.722), ('loss_caption_0', 3.061), ('loss_caption', 3.062), ('total_loss', 15.037)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 75000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.835), ('loss_caption_0', 3.102), ('loss_caption', 3.108), ('total_loss', 15.261)]), 
+time/iter = 0.195, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 76000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.19), ('loss_self_iou', 0.096), ('cardinality_error', 3.787), ('loss_ce_0', 0.305), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.055), ('loss_caption', 3.056), ('total_loss', 15.081)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 77000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.191), ('loss_self_iou', 0.101), ('cardinality_error', 3.753), ('loss_ce_0', 0.3), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.064), ('loss_caption', 3.063), ('total_loss', 15.105)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 78000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.812), ('loss_ce_0', 0.302), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.812), ('loss_caption_0', 3.075), ('loss_caption', 3.081), ('total_loss', 15.186)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 79000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.119), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.712), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.712), ('loss_caption_0', 3.004), ('loss_caption', 3.004), ('total_loss', 14.833)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 80000 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.639), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.639), ('loss_caption_0', 3.011), ('loss_caption', 3.021), ('total_loss', 14.846)]), 
+time/iter = 0.189, bad_vid = 0.000
+
+Validation results of iter 80072:
+Bleu_1:0.16525493799366836
+Bleu_2:0.09017429361474327
+Bleu_3:0.04843073565357156
+Bleu_4:0.025752141227780294
+METEOR:0.09042668571725655
+ROUGE_L:0.1657835735936403
+CIDEr:0.30766696683798356
+Recall:0.5070758476264831
+Precision:0.5698723815334497
+soda_c:0.05193286444599829
+para_Bleu_1:0.4299765573510605
+para_Bleu_2:0.24998607326423264
+para_Bleu_3:0.15168978606887273
+para_Bleu_4:0.09540463753102806
+para_METEOR:0.15913054274631774
+para_ROUGE_L:0.30821511076520103
+para_CIDEr:0.14655297481419807
+
+overall score of iter 80072: 0.4010881550915439
+
+Save model at iter 80072 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 81000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.098), ('cardinality_error', 3.664), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.972), ('loss_caption', 2.974), ('total_loss', 14.63)]), 
+time/iter = 0.723, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 82000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.098), ('cardinality_error', 3.692), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.914), ('loss_caption', 2.912), ('total_loss', 14.413)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 83000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.939), ('loss_caption', 2.933), ('total_loss', 14.562)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 84000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.119), ('loss_bbox', 0.066), ('loss_giou', 0.18), ('loss_self_iou', 0.086), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.086), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.964), ('loss_caption', 2.963), ('total_loss', 14.614)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 85000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.187), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.301), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 2.942), ('loss_caption', 2.945), ('total_loss', 14.596)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 86000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.067), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.989), ('loss_caption', 2.988), ('total_loss', 14.745)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 87000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.119), ('loss_bbox', 0.067), ('loss_giou', 0.178), ('loss_self_iou', 0.096), ('cardinality_error', 3.692), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.93), ('loss_caption', 2.931), ('total_loss', 14.465)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 88000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.102), ('cardinality_error', 3.74), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.945), ('loss_caption', 2.939), ('total_loss', 14.538)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 89000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.096), ('cardinality_error', 3.911), ('loss_ce_0', 0.303), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.911), ('loss_caption_0', 2.981), ('loss_caption', 2.985), ('total_loss', 14.762)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 90000 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.113), ('loss_bbox', 0.066), ('loss_giou', 0.174), ('loss_self_iou', 0.099), ('cardinality_error', 3.667), ('loss_ce_0', 0.3), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.946), ('loss_caption', 2.945), ('total_loss', 14.493)]), 
+time/iter = 0.191, bad_vid = 0.000
+
+Validation results of iter 90081:
+Bleu_1:0.1659435247550983
+Bleu_2:0.09010888064116455
+Bleu_3:0.04740925434645997
+Bleu_4:0.023810200153797586
+METEOR:0.0893691583245007
+ROUGE_L:0.16481267120708817
+CIDEr:0.3096929324572276
+Recall:0.5271698247293078
+Precision:0.5766981899532185
+soda_c:0.05637593299631936
+para_Bleu_1:0.4507795558374508
+para_Bleu_2:0.2668765313566654
+para_Bleu_3:0.16324000259413463
+para_Bleu_4:0.10292908422008885
+para_METEOR:0.163503434468027
+para_ROUGE_L:0.3141109355407807
+para_CIDEr:0.1830754815850521
+
+overall score of iter 90081: 0.44950800027316795
+
+Save model at iter 90081 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 90081 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 91000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.066), ('loss_giou', 0.179), ('loss_self_iou', 0.097), ('cardinality_error', 3.807), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.807), ('loss_caption_0', 2.916), ('loss_caption', 2.914), ('total_loss', 14.411)]), 
+time/iter = 0.724, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 92000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.121), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.093), ('cardinality_error', 3.784), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.784), ('loss_caption_0', 2.916), ('loss_caption', 2.915), ('total_loss', 14.422)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 93000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.117), ('loss_bbox', 0.065), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.806), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.806), ('loss_caption_0', 2.9), ('loss_caption', 2.905), ('total_loss', 14.377)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 94000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.109), ('loss_bbox', 0.068), ('loss_giou', 0.174), ('loss_self_iou', 0.105), ('cardinality_error', 3.616), ('loss_ce_0', 0.293), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.106), ('cardinality_error_0', 3.616), ('loss_caption_0', 2.912), ('loss_caption', 2.914), ('total_loss', 14.339)]), 
+time/iter = 0.187, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 95000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.12), ('loss_bbox', 0.066), ('loss_giou', 0.185), ('loss_self_iou', 0.093), ('cardinality_error', 3.805), ('loss_ce_0', 0.296), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.805), ('loss_caption_0', 2.938), ('loss_caption', 2.941), ('total_loss', 14.546)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 96000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.177), ('loss_self_iou', 0.103), ('cardinality_error', 3.684), ('loss_ce_0', 0.293), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.684), ('loss_caption_0', 2.928), ('loss_caption', 2.931), ('total_loss', 14.434)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 97000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.111), ('loss_bbox', 0.066), ('loss_giou', 0.184), ('loss_self_iou', 0.095), ('cardinality_error', 3.693), ('loss_ce_0', 0.298), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.693), ('loss_caption_0', 2.902), ('loss_caption', 2.903), ('total_loss', 14.392)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 98000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.115), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.089), ('cardinality_error', 3.738), ('loss_ce_0', 0.298), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.738), ('loss_caption_0', 2.896), ('loss_caption', 2.902), ('total_loss', 14.361)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 99000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.115), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.095), ('cardinality_error', 3.702), ('loss_ce_0', 0.296), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.702), ('loss_caption_0', 2.956), ('loss_caption', 2.956), ('total_loss', 14.525)]), 
+time/iter = 0.195, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 100000 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.092), ('cardinality_error', 3.751), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.751), ('loss_caption_0', 2.932), ('loss_caption', 2.932), ('total_loss', 14.453)]), 
+time/iter = 0.191, bad_vid = 0.000
+
+Validation results of iter 100090:
+Bleu_1:0.16664911544364056
+Bleu_2:0.09023295213839283
+Bleu_3:0.04763940550902772
+Bleu_4:0.02409205514859969
+METEOR:0.0878588871148787
+ROUGE_L:0.16401896184386325
+CIDEr:0.31947446694949533
+Recall:0.5282742157284517
+Precision:0.5750796556165633
+soda_c:0.05745241491068406
+para_Bleu_1:0.46204429574393835
+para_Bleu_2:0.2749900961045832
+para_Bleu_3:0.1683879565471281
+para_Bleu_4:0.10624339593597942
+para_METEOR:0.16245439213508253
+para_ROUGE_L:0.3162965936511474
+para_CIDEr:0.20803178964320856
+
+overall score of iter 100090: 0.4767295777142705
+
+Save model at iter 100090 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 100090 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 101000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.849), ('loss_caption', 2.847), ('total_loss', 14.064)]), 
+time/iter = 0.713, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 102000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.116), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.093), ('cardinality_error', 3.695), ('loss_ce_0', 0.293), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.85), ('loss_caption', 2.848), ('total_loss', 14.087)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 103000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.115), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.724), ('loss_ce_0', 0.293), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.846), ('loss_caption', 2.854), ('total_loss', 14.092)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 104000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.097), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.916), ('loss_caption', 2.913), ('total_loss', 14.362)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 105000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.907), ('loss_caption', 2.902), ('total_loss', 14.342)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 106000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.11), ('cardinality_error', 3.775), ('loss_ce_0', 0.293), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.876), ('loss_caption', 2.875), ('total_loss', 14.264)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 107000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.178), ('loss_self_iou', 0.099), ('cardinality_error', 3.743), ('loss_ce_0', 0.291), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.91), ('loss_caption', 2.909), ('total_loss', 14.358)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 108000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.118), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.1), ('cardinality_error', 3.81), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.81), ('loss_caption_0', 2.928), ('loss_caption', 2.93), ('total_loss', 14.446)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 109000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.118), ('loss_bbox', 0.063), ('loss_giou', 0.178), ('loss_self_iou', 0.091), ('cardinality_error', 3.78), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.78), ('loss_caption_0', 2.916), ('loss_caption', 2.912), ('total_loss', 14.396)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 110000 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.087), ('cardinality_error', 3.72), ('loss_ce_0', 0.297), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.948), ('loss_caption', 2.948), ('total_loss', 14.539)]), 
+time/iter = 0.196, bad_vid = 0.000
+
+Validation results of iter 110099:
+Bleu_1:0.1671778590456048
+Bleu_2:0.09077014613023152
+Bleu_3:0.0476684747303012
+Bleu_4:0.02445564298599047
+METEOR:0.08933235383587503
+ROUGE_L:0.1654660162888944
+CIDEr:0.31886265111118334
+Recall:0.5314017615268335
+Precision:0.5831469052945512
+soda_c:0.05853263249839839
+para_Bleu_1:0.46544090189732323
+para_Bleu_2:0.2789325258737778
+para_Bleu_3:0.17172911957785325
+para_Bleu_4:0.10903514181091935
+para_METEOR:0.16550159188298816
+para_ROUGE_L:0.3181118223429575
+para_CIDEr:0.2056618808195008
+
+overall score of iter 110099: 0.4801986145134083
+
+Save model at iter 110099 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 110099 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 111000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.095), ('cardinality_error', 3.718), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.718), ('loss_caption_0', 2.867), ('loss_caption', 2.869), ('total_loss', 14.14)]), 
+time/iter = 0.727, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 112000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.725), ('loss_ce_0', 0.289), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.844), ('loss_caption', 2.842), ('total_loss', 14.015)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 113000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.734), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.734), ('loss_caption_0', 2.837), ('loss_caption', 2.834), ('total_loss', 13.981)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 114000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.739), ('loss_ce_0', 0.285), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.739), ('loss_caption_0', 2.855), ('loss_caption', 2.857), ('total_loss', 14.084)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 115000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.175), ('loss_self_iou', 0.092), ('cardinality_error', 3.74), ('loss_ce_0', 0.284), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.823), ('loss_caption', 2.824), ('total_loss', 13.959)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 116000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.177), ('loss_self_iou', 0.088), ('cardinality_error', 3.753), ('loss_ce_0', 0.288), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.846), ('loss_caption', 2.843), ('total_loss', 14.073)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 117000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.755), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.804), ('loss_caption', 2.81), ('total_loss', 13.896)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 118000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.093), ('cardinality_error', 3.715), ('loss_ce_0', 0.285), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.715), ('loss_caption_0', 2.863), ('loss_caption', 2.866), ('total_loss', 14.129)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 119000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.064), ('loss_giou', 0.176), ('loss_self_iou', 0.098), ('cardinality_error', 3.735), ('loss_ce_0', 0.287), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.735), ('loss_caption_0', 2.844), ('loss_caption', 2.843), ('total_loss', 14.061)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 120000 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.175), ('loss_self_iou', 0.101), ('cardinality_error', 3.755), ('loss_ce_0', 0.285), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.868), ('loss_caption', 2.878), ('total_loss', 14.168)]), 
+time/iter = 0.190, bad_vid = 0.000
+
+Validation results of iter 120108:
+Bleu_1:0.16560019346009094
+Bleu_2:0.08934946581658681
+Bleu_3:0.04692472826903507
+Bleu_4:0.023331060597699706
+METEOR:0.08861943572471001
+ROUGE_L:0.16392659155605854
+CIDEr:0.31177527957257306
+Recall:0.5248955646301546
+Precision:0.5713061826316813
+soda_c:0.056694173808073595
+para_Bleu_1:0.45551540477127933
+para_Bleu_2:0.2725270289009415
+para_Bleu_3:0.16731081427102573
+para_Bleu_4:0.10555679460767188
+para_METEOR:0.1665724805603667
+para_ROUGE_L:0.31619749898051375
+para_CIDEr:0.19719071969736374
+
+overall score of iter 120108: 0.4693199948654023
+
+Save model at iter 120108 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 121000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.095), ('cardinality_error', 3.691), ('loss_ce_0', 0.284), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.809), ('loss_caption', 2.808), ('total_loss', 13.835)]), 
+time/iter = 0.727, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 122000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.17), ('loss_self_iou', 0.093), ('cardinality_error', 3.706), ('loss_ce_0', 0.281), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.706), ('loss_caption_0', 2.811), ('loss_caption', 2.814), ('total_loss', 13.867)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 123000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.691), ('loss_ce_0', 0.281), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.789), ('loss_caption', 2.797), ('total_loss', 13.808)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 124000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.063), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.76), ('loss_ce_0', 0.281), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.76), ('loss_caption_0', 2.839), ('loss_caption', 2.842), ('total_loss', 13.984)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 125000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.763), ('loss_ce_0', 0.282), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.763), ('loss_caption_0', 2.81), ('loss_caption', 2.815), ('total_loss', 13.898)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 126000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.095), ('cardinality_error', 3.717), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.717), ('loss_caption_0', 2.789), ('loss_caption', 2.787), ('total_loss', 13.835)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 127000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.867), ('loss_caption', 2.871), ('total_loss', 14.097)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 128000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.113), ('loss_bbox', 0.063), ('loss_giou', 0.173), ('loss_self_iou', 0.092), ('cardinality_error', 3.793), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.793), ('loss_caption_0', 2.868), ('loss_caption', 2.863), ('total_loss', 14.111)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 129000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.1), ('cardinality_error', 3.686), ('loss_ce_0', 0.283), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.686), ('loss_caption_0', 2.812), ('loss_caption', 2.813), ('total_loss', 13.903)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 130000 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.772), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.772), ('loss_caption_0', 2.86), ('loss_caption', 2.861), ('total_loss', 14.105)]), 
+time/iter = 0.190, bad_vid = 0.000
+
+Validation results of iter 130117:
+Bleu_1:0.16778675341331784
+Bleu_2:0.09082555766488616
+Bleu_3:0.047445681271689716
+Bleu_4:0.02375280793420285
+METEOR:0.08883520478698428
+ROUGE_L:0.16531435721130755
+CIDEr:0.31778343902267087
+Recall:0.5273619026669621
+Precision:0.5698181479221706
+soda_c:0.05753856798988932
+para_Bleu_1:0.4610381779339771
+para_Bleu_2:0.2761144617772928
+para_Bleu_3:0.16915034097081671
+para_Bleu_4:0.10654029953240575
+para_METEOR:0.16638305166981465
+para_ROUGE_L:0.31710573495570465
+para_CIDEr:0.19601570682645908
+
+overall score of iter 130117: 0.46893905802867947
+
+Save model at iter 130117 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 131000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.062), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.75), ('loss_ce_0', 0.279), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.75), ('loss_caption_0', 2.817), ('loss_caption', 2.826), ('total_loss', 13.897)]), 
+time/iter = 0.734, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 132000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.109), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.089), ('cardinality_error', 3.814), ('loss_ce_0', 0.274), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.814), ('loss_caption_0', 2.778), ('loss_caption', 2.776), ('total_loss', 13.726)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 133000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.095), ('cardinality_error', 3.773), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.773), ('loss_caption_0', 2.843), ('loss_caption', 2.843), ('total_loss', 13.999)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 134000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.108), ('loss_bbox', 0.065), ('loss_giou', 0.171), ('loss_self_iou', 0.101), ('cardinality_error', 3.743), ('loss_ce_0', 0.276), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.786), ('loss_caption', 2.787), ('total_loss', 13.756)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 135000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.794), ('loss_ce_0', 0.281), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.785), ('loss_caption', 2.784), ('total_loss', 13.759)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 136000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.065), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.653), ('loss_ce_0', 0.279), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.828), ('loss_caption', 2.834), ('total_loss', 13.919)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 137000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.105), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.099), ('cardinality_error', 3.654), ('loss_ce_0', 0.281), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.654), ('loss_caption_0', 2.79), ('loss_caption', 2.799), ('total_loss', 13.806)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 138000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.171), ('loss_self_iou', 0.095), ('cardinality_error', 3.714), ('loss_ce_0', 0.28), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.835), ('loss_caption', 2.828), ('total_loss', 13.945)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 139000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.098), ('cardinality_error', 3.813), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.83), ('loss_caption', 2.828), ('total_loss', 13.924)]), 
+time/iter = 0.186, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 140000 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.09), ('cardinality_error', 3.664), ('loss_ce_0', 0.28), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.821), ('loss_caption', 2.823), ('total_loss', 13.905)]), 
+time/iter = 0.191, bad_vid = 0.000
+
+Validation results of iter 140126:
+Bleu_1:0.16683698969676453
+Bleu_2:0.09036855967772307
+Bleu_3:0.047484441130632896
+Bleu_4:0.023876859658376735
+METEOR:0.08814626862844692
+ROUGE_L:0.16473003568483396
+CIDEr:0.3189568758512915
+Recall:0.5281546209817979
+Precision:0.5704333604501349
+soda_c:0.057417105431783064
+para_Bleu_1:0.4580706340663244
+para_Bleu_2:0.27372623489326064
+para_Bleu_3:0.16745128920972313
+para_Bleu_4:0.10550306643408856
+para_METEOR:0.16656454278617736
+para_ROUGE_L:0.31631873012989425
+para_CIDEr:0.19724321819057877
+
+overall score of iter 140126: 0.46931082741084473
+
+Save model at iter 140126 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 141000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.171), ('loss_self_iou', 0.106), ('cardinality_error', 3.774), ('loss_ce_0', 0.27), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.75), ('loss_caption', 2.748), ('total_loss', 13.572)]), 
+time/iter = 0.739, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 142000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.173), ('loss_self_iou', 0.091), ('cardinality_error', 3.797), ('loss_ce_0', 0.272), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.797), ('loss_caption_0', 2.72), ('loss_caption', 2.722), ('total_loss', 13.492)]), 
+time/iter = 0.186, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 143000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.162), ('loss_self_iou', 0.095), ('cardinality_error', 3.637), ('loss_ce_0', 0.268), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.637), ('loss_caption_0', 2.782), ('loss_caption', 2.782), ('total_loss', 13.626)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 144000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.112), ('loss_bbox', 0.062), ('loss_giou', 0.172), ('loss_self_iou', 0.094), ('cardinality_error', 3.831), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.831), ('loss_caption_0', 2.793), ('loss_caption', 2.79), ('total_loss', 13.773)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 145000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.101), ('loss_bbox', 0.061), ('loss_giou', 0.16), ('loss_self_iou', 0.093), ('cardinality_error', 3.665), ('loss_ce_0', 0.273), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.168), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.762), ('loss_caption', 2.767), ('total_loss', 13.554)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 146000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.725), ('loss_ce_0', 0.276), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.813), ('loss_caption', 2.813), ('total_loss', 13.811)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 147000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.097), ('cardinality_error', 3.714), ('loss_ce_0', 0.273), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.747), ('loss_caption', 2.745), ('total_loss', 13.578)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 148000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.728), ('loss_ce_0', 0.274), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.728), ('loss_caption_0', 2.843), ('loss_caption', 2.84), ('total_loss', 13.944)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 149000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.799), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.836), ('loss_caption', 2.836), ('total_loss', 13.926)]), 
+time/iter = 0.196, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 150000 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.087), ('cardinality_error', 3.703), ('loss_ce_0', 0.272), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.703), ('loss_caption_0', 2.806), ('loss_caption', 2.806), ('total_loss', 13.795)]), 
+time/iter = 0.193, bad_vid = 0.000
+
+Validation results of iter 150135:
+Bleu_1:0.16662144072598145
+Bleu_2:0.08988753231411394
+Bleu_3:0.04690847145308288
+Bleu_4:0.023224274927987735
+METEOR:0.08725158341768323
+ROUGE_L:0.16364893754496343
+CIDEr:0.32028824475030926
+Recall:0.5260420675803493
+Precision:0.5630584367161506
+soda_c:0.057565785652999135
+para_Bleu_1:0.46764194087144684
+para_Bleu_2:0.2801629240374498
+para_Bleu_3:0.1713033186995987
+para_Bleu_4:0.10750827268624512
+para_METEOR:0.16742715934059368
+para_ROUGE_L:0.31858424377772926
+para_CIDEr:0.2089956210595351
+
+overall score of iter 150135: 0.4839310530863739
+
+Save model at iter 150135 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 150135 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 151000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.101), ('loss_bbox', 0.063), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.645), ('loss_ce_0', 0.266), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.645), ('loss_caption_0', 2.762), ('loss_caption', 2.759), ('total_loss', 13.537)]), 
+time/iter = 0.737, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 152000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.087), ('cardinality_error', 3.722), ('loss_ce_0', 0.269), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.762), ('loss_caption', 2.766), ('total_loss', 13.59)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 153000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.111), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.083), ('cardinality_error', 3.813), ('loss_ce_0', 0.267), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.085), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.777), ('loss_caption', 2.778), ('total_loss', 13.663)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 154000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.106), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.769), ('loss_ce_0', 0.272), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.787), ('loss_caption', 2.787), ('total_loss', 13.717)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 155000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.09), ('cardinality_error', 3.714), ('loss_ce_0', 0.267), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.758), ('loss_caption', 2.76), ('total_loss', 13.593)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 156000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.167), ('loss_self_iou', 0.102), ('cardinality_error', 3.675), ('loss_ce_0', 0.269), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.675), ('loss_caption_0', 2.741), ('loss_caption', 2.742), ('total_loss', 13.504)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 157000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.267), ('loss_counter', 0.104), ('loss_bbox', 0.065), ('loss_giou', 0.167), ('loss_self_iou', 0.103), ('cardinality_error', 3.722), ('loss_ce_0', 0.268), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.777), ('loss_caption', 2.783), ('total_loss', 13.668)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 158000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.099), ('cardinality_error', 3.758), ('loss_ce_0', 0.27), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.758), ('loss_caption_0', 2.815), ('loss_caption', 2.817), ('total_loss', 13.789)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 159000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.729), ('loss_ce_0', 0.275), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.783), ('loss_caption', 2.785), ('total_loss', 13.721)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 160000 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.109), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.098), ('cardinality_error', 3.816), ('loss_ce_0', 0.271), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.78), ('loss_caption', 2.784), ('total_loss', 13.686)]), 
+time/iter = 0.196, bad_vid = 0.000
+
+Validation results of iter 160144:
+Bleu_1:0.16754398447821903
+Bleu_2:0.08978801866243748
+Bleu_3:0.046077601805781236
+Bleu_4:0.02215727819941335
+METEOR:0.08650894641812401
+ROUGE_L:0.16425299709373153
+CIDEr:0.3192637628790779
+Recall:0.5308598805776927
+Precision:0.5705477594739302
+soda_c:0.059035206979637336
+para_Bleu_1:0.4722129873397206
+para_Bleu_2:0.2843271953295457
+para_Bleu_3:0.17433620623201318
+para_Bleu_4:0.10943737200004257
+para_METEOR:0.16524483023272712
+para_ROUGE_L:0.3180351825656492
+para_CIDEr:0.2139382514781602
+
+overall score of iter 160144: 0.4886204537109299
+
+Save model at iter 160144 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 160144 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 161000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.103), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.695), ('loss_ce_0', 0.263), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.766), ('loss_caption', 2.768), ('total_loss', 13.553)]), 
+time/iter = 0.749, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 162000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.694), ('loss_ce_0', 0.266), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.768), ('loss_caption', 2.764), ('total_loss', 13.573)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 163000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.173), ('loss_self_iou', 0.097), ('cardinality_error', 3.769), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.765), ('loss_caption', 2.766), ('total_loss', 13.63)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 164000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.11), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.774), ('loss_ce_0', 0.269), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.772), ('loss_caption', 2.776), ('total_loss', 13.625)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 165000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.102), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.699), ('loss_ce_0', 0.267), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.711), ('loss_caption', 2.716), ('total_loss', 13.368)]), 
+time/iter = 0.187, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 166000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.105), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.72), ('loss_ce_0', 0.268), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.754), ('loss_caption', 2.755), ('total_loss', 13.534)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 167000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.095), ('cardinality_error', 3.712), ('loss_ce_0', 0.266), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.712), ('loss_caption_0', 2.771), ('loss_caption', 2.772), ('total_loss', 13.617)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 168000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.09), ('cardinality_error', 3.816), ('loss_ce_0', 0.269), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.814), ('loss_caption', 2.82), ('total_loss', 13.826)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 169000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.106), ('cardinality_error', 3.697), ('loss_ce_0', 0.261), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.697), ('loss_caption_0', 2.769), ('loss_caption', 2.775), ('total_loss', 13.598)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 170000 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.105), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.093), ('cardinality_error', 3.799), ('loss_ce_0', 0.272), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.794), ('loss_caption', 2.798), ('total_loss', 13.727)]), 
+time/iter = 0.191, bad_vid = 0.000
+
+Validation results of iter 170153:
+Bleu_1:0.16584280243722227
+Bleu_2:0.08889969905794425
+Bleu_3:0.04569298286173284
+Bleu_4:0.021992960199339176
+METEOR:0.08570833880397384
+ROUGE_L:0.16234979503724006
+CIDEr:0.3170462149966731
+Recall:0.5273397281824633
+Precision:0.5648989898989865
+soda_c:0.058539462474976364
+para_Bleu_1:0.4735378044184376
+para_Bleu_2:0.2855599966961999
+para_Bleu_3:0.17485842077678387
+para_Bleu_4:0.10998333079246524
+para_METEOR:0.16580782598840993
+para_ROUGE_L:0.3184105968751349
+para_CIDEr:0.2144083270960459
+
+overall score of iter 170153: 0.4901994838769211
+
+Save model at iter 170153 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 170153 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 171000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.161), ('loss_self_iou', 0.094), ('cardinality_error', 3.694), ('loss_ce_0', 0.261), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.772), ('loss_caption', 2.77), ('total_loss', 13.544)]), 
+time/iter = 0.745, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 172000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.096), ('cardinality_error', 3.667), ('loss_ce_0', 0.262), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.741), ('loss_caption', 2.743), ('total_loss', 13.47)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 173000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.104), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.09), ('cardinality_error', 3.753), ('loss_ce_0', 0.261), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.786), ('loss_caption', 2.785), ('total_loss', 13.646)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 174000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.107), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.094), ('cardinality_error', 3.832), ('loss_ce_0', 0.261), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.832), ('loss_caption_0', 2.733), ('loss_caption', 2.738), ('total_loss', 13.457)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 175000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.163), ('loss_self_iou', 0.098), ('cardinality_error', 3.731), ('loss_ce_0', 0.259), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.062), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.731), ('loss_caption_0', 2.745), ('loss_caption', 2.744), ('total_loss', 13.454)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 176000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.095), ('cardinality_error', 3.795), ('loss_ce_0', 0.264), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.795), ('loss_caption_0', 2.761), ('loss_caption', 2.77), ('total_loss', 13.575)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 177000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.161), ('loss_self_iou', 0.096), ('cardinality_error', 3.652), ('loss_ce_0', 0.261), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.745), ('total_loss', 13.43)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 178000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.103), ('cardinality_error', 3.664), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.682), ('loss_caption', 2.68), ('total_loss', 13.211)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 179000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.105), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.09), ('cardinality_error', 3.825), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.825), ('loss_caption_0', 2.788), ('loss_caption', 2.796), ('total_loss', 13.671)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 180000 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.781), ('loss_caption', 2.775), ('total_loss', 13.608)]), 
+time/iter = 0.192, bad_vid = 0.000
+
+Validation results of iter 180162:
+Bleu_1:0.16720622564646215
+Bleu_2:0.08946643461131876
+Bleu_3:0.04568137095423273
+Bleu_4:0.022039722503534608
+METEOR:0.08588931176535387
+ROUGE_L:0.16315869782389542
+CIDEr:0.32099741016990446
+Recall:0.5265047853249455
+Precision:0.5647345942647923
+soda_c:0.05847424883094643
+para_Bleu_1:0.47508155945278135
+para_Bleu_2:0.2858233856765029
+para_Bleu_3:0.17499503512152859
+para_Bleu_4:0.11002968407978216
+para_METEOR:0.16541373751181562
+para_ROUGE_L:0.3190110890037882
+para_CIDEr:0.21421557986951392
+
+overall score of iter 180162: 0.4896590014611117
+
+Save model at iter 180162 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 181000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.781), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.781), ('loss_caption_0', 2.743), ('loss_caption', 2.746), ('total_loss', 13.452)]), 
+time/iter = 0.750, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 182000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.1), ('cardinality_error', 3.726), ('loss_ce_0', 0.26), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.748), ('loss_caption', 2.746), ('total_loss', 13.472)]), 
+time/iter = 0.189, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 183000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.722), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.729), ('loss_caption', 2.734), ('total_loss', 13.405)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 184000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.726), ('loss_ce_0', 0.257), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.783), ('loss_caption', 2.787), ('total_loss', 13.591)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 185000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.098), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.087), ('cardinality_error', 3.667), ('loss_ce_0', 0.26), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.718), ('loss_caption', 2.716), ('total_loss', 13.354)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 186000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.776), ('loss_ce_0', 0.259), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.776), ('loss_caption_0', 2.75), ('loss_caption', 2.75), ('total_loss', 13.494)]), 
+time/iter = 0.194, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 187000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.803), ('loss_ce_0', 0.264), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.803), ('loss_caption_0', 2.788), ('loss_caption', 2.791), ('total_loss', 13.678)]), 
+time/iter = 0.198, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 188000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.1), ('loss_bbox', 0.062), ('loss_giou', 0.163), ('loss_self_iou', 0.091), ('cardinality_error', 3.71), ('loss_ce_0', 0.259), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.71), ('loss_caption_0', 2.745), ('loss_caption', 2.743), ('total_loss', 13.444)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 189000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.165), ('loss_self_iou', 0.1), ('cardinality_error', 3.748), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.748), ('loss_caption_0', 2.751), ('loss_caption', 2.753), ('total_loss', 13.484)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 190000 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.104), ('loss_bbox', 0.06), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 2.729), ('loss_caption', 2.73), ('total_loss', 13.395)]), 
+time/iter = 0.189, bad_vid = 0.000
+
+Validation results of iter 190171:
+Bleu_1:0.1662475028889873
+Bleu_2:0.08895418147726737
+Bleu_3:0.04559170272578064
+Bleu_4:0.021869443641790748
+METEOR:0.0853620749347768
+ROUGE_L:0.16226693807975517
+CIDEr:0.3203697867996399
+Recall:0.5243080966273422
+Precision:0.5592002237136435
+soda_c:0.058066485957305666
+para_Bleu_1:0.47302383939773723
+para_Bleu_2:0.2848420020452884
+para_Bleu_3:0.17477626094199183
+para_Bleu_4:0.11005159892431456
+para_METEOR:0.16474042555391544
+para_ROUGE_L:0.31754161420686944
+para_CIDEr:0.2082818020277855
+
+overall score of iter 190171: 0.4830738265060155
+
+Save model at iter 190171 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 191000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.086), ('cardinality_error', 3.653), ('loss_ce_0', 0.257), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.754), ('loss_caption', 2.752), ('total_loss', 13.501)]), 
+time/iter = 0.755, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 192000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.1), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.094), ('cardinality_error', 3.767), ('loss_ce_0', 0.258), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.767), ('loss_caption_0', 2.717), ('loss_caption', 2.72), ('total_loss', 13.343)]), 
+time/iter = 0.188, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 193000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.106), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.093), ('cardinality_error', 3.847), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.847), ('loss_caption_0', 2.754), ('loss_caption', 2.759), ('total_loss', 13.499)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 194000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.165), ('loss_self_iou', 0.097), ('cardinality_error', 3.775), ('loss_ce_0', 0.262), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.769), ('loss_caption', 2.772), ('total_loss', 13.587)]), 
+time/iter = 0.192, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 195000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.794), ('loss_ce_0', 0.261), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.751), ('loss_caption', 2.751), ('total_loss', 13.506)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 196000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.095), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.1), ('cardinality_error', 3.652), ('loss_ce_0', 0.258), ('loss_counter_0', 0.095), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.735), ('total_loss', 13.403)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 197000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.091), ('cardinality_error', 3.759), ('loss_ce_0', 0.258), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.759), ('loss_caption_0', 2.74), ('loss_caption', 2.743), ('total_loss', 13.418)]), 
+time/iter = 0.191, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 198000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.098), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.092), ('cardinality_error', 3.664), ('loss_ce_0', 0.255), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.718), ('loss_caption', 2.72), ('total_loss', 13.31)]), 
+time/iter = 0.190, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 199000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.101), ('cardinality_error', 3.736), ('loss_ce_0', 0.257), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.759), ('loss_caption', 2.76), ('total_loss', 13.502)]), 
+time/iter = 0.193, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 200000 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.159), ('loss_self_iou', 0.098), ('cardinality_error', 3.701), ('loss_ce_0', 0.259), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.701), ('loss_caption_0', 2.766), ('loss_caption', 2.771), ('total_loss', 13.518)]), 
+time/iter = 0.190, bad_vid = 0.000
+
+Validation results of iter 200180:
+Bleu_1:0.16600244771432068
+Bleu_2:0.08859363359362551
+Bleu_3:0.045174799285766926
+Bleu_4:0.021453706973694267
+METEOR:0.08469975853590762
+ROUGE_L:0.1615333099598977
+CIDEr:0.3178372173219055
+Recall:0.5270524681293403
+Precision:0.5612365263371945
+soda_c:0.05852570981425518
+para_Bleu_1:0.47641872729084495
+para_Bleu_2:0.28679556025023933
+para_Bleu_3:0.1757988669447671
+para_Bleu_4:0.11061748158923715
+para_METEOR:0.1647238014039032
+para_ROUGE_L:0.3182336912910021
+para_CIDEr:0.21852415031403352
+
+overall score of iter 200180: 0.4938654333071738
+
+Save model at iter 200180 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
+Save Best-model at iter 200180 to checkpoint file.
+Save info to info.json
+Best epoch: 10
+
+Best Model Performance:
+Bleu_1:0.1671778590456048
+Bleu_2:0.09077014613023152
+Bleu_3:0.0476684747303012
+Bleu_4:0.02445564298599047
+METEOR:0.08933235383587503
+ROUGE_L:0.1654660162888944
+CIDEr:0.31886265111118334
+Recall:0.5314017615268335
+Precision:0.5831469052945512
+soda_c:0.05853263249839839
+para_Bleu_1:0.46544090189732323
+para_Bleu_2:0.2789325258737778
+para_Bleu_3:0.17172911957785325
+para_Bleu_4:0.10903514181091935
+para_METEOR:0.16550159188298816
+para_ROUGE_L:0.3181118223429575
+para_CIDEr:0.2056618808195008
+avg_proposal_number:-1
+
+Best Overall Score epoch10: 1.5812763042668414
+
diff --git a/anet_clip/val.log b/anet_clip/val.log
new file mode 100644
index 0000000000000000000000000000000000000000..2937f5d88e3790d388f53f3845a2179514931da2
--- /dev/null
+++ b/anet_clip/val.log
@@ -0,0 +1,21 @@
+Best Model Performance:
+Bleu_1:0.1671778590456048
+Bleu_2:0.09077014613023152
+Bleu_3:0.0476684747303012
+Bleu_4:0.02445564298599047
+METEOR:0.08933235383587503
+ROUGE_L:0.1654660162888944
+CIDEr:0.31886265111118334
+Recall:0.5314017615268335
+Precision:0.5831469052945512
+soda_c:0.05853263249839839
+para_Bleu_1:0.46544090189732323
+para_Bleu_2:0.2789325258737778
+para_Bleu_3:0.17172911957785325
+para_Bleu_4:0.10903514181091935
+para_METEOR:0.16550159188298816
+para_ROUGE_L:0.3181118223429575
+para_CIDEr:0.2056618808195008
+avg_proposal_number:-1
+
+Best Overall Score epoch10: 1.5812763042668414
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..63d60c15c02ba592a06fc67e09c654d568891054
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9569dff52f023f43117ca926bbde3e1f14003fdd
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b28f2bcdbfef92df0153ebf03faaa2bc73158a1
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c9314eb31f87ff6f0f44cbcf948b2c4224a9eafa
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fa91240b87d388c80f808d8a78858fc60e197ed5
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..201c1ff4ff577f8ff9d247b699a0118d13adb728
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_clip-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6a34d7ea66b7574d48f980820ae8fd055632c014
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top20_r2_iter3_th2_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8f7cc5a1f6a8314a0fb47ec38587b39870114639
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..423e5ab89a334fa4ddb0234345c578eee20851cd
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4a5acd993bcf0a0f439319aef297c4eaf9ec2b15
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70fa415758e8d88826ce9466a1533dbf91cbcf95
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1bba6ba8d60d7c73b4a8f81a30d3bfafbcc6c1bf
--- /dev/null
+++ b/yc2_univl/backup/cfgs/anet_univl-simop_order_v2_top30_r2_iter3_th2_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 30
+width_ratio: 2 # scale for the width of the network
+iteration: 3
+width_th: 2
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..864c3a0fc0ada3b8ae6d5c81edc5d12586d3123e
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a34730f54add9830465c52e42fbfc9536b95a29
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..de7481364454b43fb87a1655b09d949110b25c5c
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..34eecf5b883e5b9c6f750e1e747313b6202c5291
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..61d3e52456e1ea6b0d6726c159018cade8a37d34
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a07a5868fc11247094f40d8d8350e9088832eb8d
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8d52bdad23f666dc1d065a692affd57950d91ee5
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6ba6d107bf6ea0c714bdcee68f97e22bf3d94f03
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..002f85af4093414f60b6e37e5edc14b204758ac1
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b3f476c95ea36a4bf987b132a41393c5d09ef19c
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b505369a59c4e6956fc3222dea1d31be4a831ff8
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b3dd203151d68d04d7367c1bb92602c4c9c44036
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fcfaef85faa02aabfc3633861e2c3b3f88ec2b66
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..97fbc46c507b17e85a2c8ec633ac1d01645609d3
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d01e18020cd386eb7f5db9fd00662eb3992740eb
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a678cdbd0c0195b00d3315750ec658810a0bfaa
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6c339820bdc37d7f054932b6d74615188021197d
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c19e8416b340d0c95e10fd3390f640a07f7184f5
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7cf973f734fd70cea269c6a60dc0093e29bbc04
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1b4b417c3f5f2d549211c84b1587f94bafa5cf9a
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f144a283f2b705dfac08821fb6b1038d07d4fe7b
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0141ba07a86d4a5fc85962f8185ca6771c0f149d
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6dde138a7b891c588267ba53f189063824fc4fbb
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b8d051a83ac473fc18d14c10b8226dc414381d9c
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c4d48bc7e63e6428984d0bc2129742f2f7dbc262
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7555c91df9a6110009920a7b5ac22c155cc59cfe
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..820f13ab195e62fe83b6a5c8d8086ef3ffb62b28
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..62551ec728f7b1283c495996b80d72abe2302686
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3a2cda8b9fa5b1093cd4327dcc407bff408a00e6
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-anet_anet_univl_topk40_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 40
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e270b46619490eec7d96e25950138a4e96238d6a
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..65ca7f9d880f365ced5096533819011ad152b1be
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c9d59a3f0a2f985360f987d74142d74c3ad8ce9b
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87364862dc54a01ff7835edf337d948ef7aff565
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..73c09505c83eae12bc26b6b16f8e4239aa5914d8
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ff51f66d82ad89fac2bd3d3340d30d2d2d5c1885
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a9391a3c8b96f98d601212fe5fee01d56fd73b2f
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..756953c9ca6e0fc91efe05cd28cd8d01f18c1700
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fdc8083bf18fbabbcdc25a93b1f095f6a276a544
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5c6bbfc78122b62b4c6f8bb4abae8adc706e30ad
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_puyu.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8527b0a6296d1a970dca3691f3d31ea3dfa281d
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0d9d74565b11b912a681a1871360ed0bd2385ff9
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_clip_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57f86183de530604294a363c8387c1c8b49e93af
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..de9accc5c444a46a8639dc63b3d2841b6b1fdb28
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 15
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a600901cb9f10422e2ce532cb8c77ac03dc57959
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29e6ac298128de00d5eafc256f3cefc35eb26585
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk20_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 20
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dae41f38476b77921cdb2f030d11a1f32076622b
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1.yml
@@ -0,0 +1,14 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
+
+
+refine_pseudo_box: 0
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165
--- /dev/null
+++ b/yc2_univl/backup/cfgs/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3741fe96fbe15b96ea12feca4e9fa98e58b4b141
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ee94d104a9e0878da2aa2e588adeb888ff12355
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc15339112fa0ff01c5615b311bebee685e3c089
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..38f4cd642822a36efd860f20971030093b467b26
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c08068e50a1b52db2346ed7d91f994822ecb308a
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e3d37e0af57206d3a8ace41cd93cac2d92a99aad
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_clip-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..39fac1ab1f5ba0ef8be9166ab400b0303dab3c55
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top2_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 2
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2342e4a5a58d9938a847f0bf11ea87de5900dadf
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_1stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d90465e8281858d7557440ae000a2d8030b5f1be
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca08a4041f2660b9eef0f6db53a672d88bfaa52e
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top3_3stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 3
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..18d258a9eff34e7bbbcdebcd0462250746352d21
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top4_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 4
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..739efd5c2d9c526fac569d14c81206c02a677755
--- /dev/null
+++ b/yc2_univl/backup/cfgs/yc2_univl-simop_order_v2_top15_r1_iter3_th1_refine_aug(8,0.02)_top5_2stage_inscap.yml
@@ -0,0 +1,20 @@
+id: ''
+base_cfg_path: cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+top_frames: 15
+width_ratio: 1 # scale for the width of the network
+iteration: 3
+width_th: 1
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 5
+pseudo_box_type: similarity_op_order_v2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dbc433e0a1e0d5b37361a96e3970c0d720639db4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_CLIP_pdvc.yml
@@ -0,0 +1,17 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d4e2a056537258e7c06849d22d4c26c7b25e223f
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_UniVL_pdvc.yml
@@ -0,0 +1,17 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..72849bd96ab440b568774e6ee8a57f6ed6788162
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_abox_CLIP_pdvc.yml
@@ -0,0 +1,27 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: similarity
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fbcd022d8623d2fa4e95c31b1f8f6adef8076c1f
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_abox_UniVL_pdvc.yml
@@ -0,0 +1,27 @@
+id: base # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: similarity
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fa5891c2d59ff2e88a6ccbca706f7ca15f539976
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9d3fff7669479f43563fb8f80dc98091b835cd49
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d00afb6f6a5f3d979ee0b513299460ed59528d71
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b56d4945517c6bd61351fcbef05b48c9b7448d25
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ee0f7cd39031aac12a49fa3febbad874ee84eb3
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09ee646dbe8a44dc5bd827e2ff354f954306512e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bcc52404af176f94a0d3cecc3fcff26900f73e07
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c22d10c1f33975e94f1777fc6529b90feb81ba71
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a4ee8540f6cbd301e407c8fd4795518a729b2a7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e5cc7747b0917df6ff67aa7af2dcc961853d3643
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a8ec8379c8cfae0384ed5c5a23cdc8ba28b250d8
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7174b3bc97912d16e2db4f3948daf1d5ccca79d2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..047111607ce477a768f570d0c1bdbe1809fd2b27
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ec22f2c3149a6bc2a839c8b7a1e7f95f799a740b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bcd7b440c4ff76e8b2cfa3324524c6a0fb7f8b3e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ceeeab0f8eddaef985f7f96f30092bf3be1c477
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b9514fabfbb681385913ea0ab7edaedaaa62b628
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..521dc511180da8f9b94569e4ab0a45844266d973
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddd576b3753f4db9a137e8d3cb80dd233e0a4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cbe60a5936fe763099137039a222cd0563ebaea9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9960dcaa94683c69f1c63bd6fadef7b17315ebea
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e57a1150b7f8eb19b75d32729945101e3f63970a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a639c02411b28e7b61485c8306201526a98bb30
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09300cdacf1b21dae6e6d983cc53c0c3674f146c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d933f4e367996fcd355585308151316fde844160
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9289c03643143a572b64987cf8b733118531b7a2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ecd4bd64bff3f0c9ec4c9a532146ccd657edc907
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0d228dc7e9de40c84d91a90b4e9e3accd41af0d7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6fe87609b78a2e77ad203ca2136882ba13568493
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d21448627979c6757d3299faee2ea5ab4d2d1b09
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc8c5a5dc77dae477e0f880e515993a46357a8c8
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..665f42194576bed58d657628d916710efa51b514
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc8f3897e2e398bf6cedf9c02936553098f2c73
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ac2b451a19de13876e1d0dd042878289fdaa195
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..935b0ececa15dcf4658c1e10a5ae52b93079b0fc
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc.yml
@@ -0,0 +1,11 @@
+id: anet_c3d_pdvc # the results and logs will saved in this folder ./save/id
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e0db6b87acea5ffa66e35e868e44194a04c39852
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvc_gt.yml
@@ -0,0 +1,9 @@
+id: anet_c3d_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..828311fc71fcc95e9b1a08506d11bb6ab602b665
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl.yml
@@ -0,0 +1,53 @@
+id: anet_c3d_pdvcl
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..02b38b6f2dbbb53b838d9bfbab8cf268a7c02c62
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_pdvcl_gt.yml
@@ -0,0 +1,55 @@
+id: anet_c3d_pdvcl_gt
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 10
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+#with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0.00001
+set_cost_class: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 0
+bbox_loss_coef: 0
+cls_loss_coef: 0
+count_loss_coef: 0
+max_eseq_length: 10
+#lloss_cross_entropy: 0
+#lloss_focal_loss: 0
+#lloss_gau_mask: 1
+
+#two_stage: 1
+transformer_input_type: gt_proposals
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml b/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d2aa20fce1241e60ad77a69980acf1e3b653ef1
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_c3d_props.yml
@@ -0,0 +1,51 @@
+id: anet_c3d_props
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+
+train_proposal_type: gt
+train_proposal_sample_num: 15
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 10
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: none
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 0
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..147d726179a848dabb0367b22575fa2f20de4097
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_i3dvgg_pdvc
+base_cfg_path: cfgs_base/anet_c3d_pdvc.yml
+visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish']
+visual_feature_folder: ['data/anet/features/i3d/', 'data/anet/features/i3d/', 'data/anet/features/vggish/']
+invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json']
+feature_dim: 2176
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a6991e551815ec0ac234c30ab3a6d09f1bd75cf
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_i3dvgg_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_i3dvgg_pdvc_gt
+base_cfg_path: cfgs_base/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['i3d_rgb', 'i3d_flow', 'vggish']
+visual_feature_folder: ['data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/i3d_25fps_stack64step64_2stream_npy/', 'data/anet/features/vggish_npy/']
+invalid_video_json: ['data/anet/features/I3D_vggish_invalid_videos.json']
+feature_dim: 2176
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f30c2ab5a626538f4dbc2c1a1bc497196ff46f24
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2d3b50035fe6a95bb2a5790f8b3611be54fc0fa7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..270ae993e3f72bc2d9091b4809e8715fc6c86dae
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7340a306c59e2ac685e8c59ac2960fde366e9c7b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..16f3082ecb947291ccb4f2226312fcf3fa06d349
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c167a9b0499503b9eff84d0c1ea1aa42453cf117
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c48adf788535e24daf8e7ffe16f2e60009118f1f
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..65d5217788315daae5e6bbf002eb746b010e2bde
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9dc19b7dcbc2f364b03abc0014d17eb6375b4a99
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..042d3f885aff7b261f66fa03dad252aedbf2fcf9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2f6e44731cf2c826b91b8173148e120b64d04f66
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4e0f151d036988969e902c436085fa850bb50a4d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8e44527dea994822b026b814df9b354aff082b53
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..08f52e1ac3b1ebca5e2d62c639fd5f6b5752ddf9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bd202e1359b3d756d903c93acf07e4dad268323e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3fe14dc05390932f58a3f5ce8b3ffa3828296200
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..211b0adc17f02c1b64ce3ceff2c0122c7581eada
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..674edd157c6ffe26bd7e9248faffc1a68a997d35
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..20243b5ac8be187c91a7e54eb86cc27db6f21559
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cb675d9549b34cf0ee2258d3e6a107273d1e4ffd
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..586821983af378162d355b298b2788e0c651e0e6
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..42476cbf72bb3d8b304e89192a30bda1606046aa
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b0a8e97b44c541a99afc965764187cf264bd4268
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9e0ce1a0db8dd453b811f6f0d5609f8ae7648a6d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8725e365b44b967e794cdc16423a571c71e33bd
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2fd6450a47d8392d35b67b997ec6173f35b6ee4b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5cd314fb9fd20fa05b1b5417604b40d115ee008e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..23e3dae52ca63a879083d7eafc8c1ab7e1556d71
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a4a108d38d2c512864344770d5943e439eb151d5
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c30ad37380e6b9ef2b845061471ab2a4ff293d91
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b140662d119689dca4e409f85da91e882323bc0f
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5e03028433a6bbc1c25ae936d791096e8e7826b2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ad1160b06416b51bd4e728eef5e6225f023796c0
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f56ac030b287f6c0a806833b546d90a9d8fe9670
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..79f9caa36975efda224cb605af412efda721e7dc
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvc
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c748cd44f1b9ea7607e4482da4af8444347d3f88
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5543e4e259942b72d98f1fe16cd4311be93ef3c7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvcl
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9804be364f78a4a8f26e30e0e6923558194edcd9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsn_pdvcl_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsn_pdvcl_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl_gt.yml
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/anet/features/resnet_bn', 'data/anet/features/resnet_bn']
+invalid_video_json: ['data/anet/features/resnet_bn_invalid_videos.json']
+feature_dim: 3072
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1c4ef82922a7df99f37d1a626d4a89e8c9b95722
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvc
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..df92966691ed0fa33bc4b7417f6c0ade5b383869
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvc_gt.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvc_gt
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvc_gt.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
diff --git a/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5298c707ab8887be611c86d522e855a8a5123a4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/anet/anet_tsp_pdvcl.yml
@@ -0,0 +1,6 @@
+id: anet_tsp_pdvcl
+base_cfg_path: cfgs_base/anet/anet_c3d_pdvcl.yml
+visual_feature_type: ['tsp']
+visual_feature_folder: ['data/anet/features/tsp']
+invalid_video_json: []
+feature_dim: 512
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d83ae5b6762ddc39bcdab2aedddf47a6ed8571d3
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json']
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_anet.json
+vocab_size: 16221
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..889d543d5b813b8a574700f9ad209fd237144075
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/anet/captiondata/train_modified.json']
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_mixlm_anet.json
+vocab_size: 18884
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a7bf2a745aecc0b05232f717c81a97333ee55af3
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-anet_anet_puyu.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/anet/captiondata/train_modified.json']
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_puyu_anet.json
+vocab_size: 15249
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..17b3bd0263edd713fc329bf1df7b539e2f160b3d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2.json
+vocab_size: 14538
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..36d19db653936c2342b12bfc603de32b2295e287
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_mixlm_yc2.json
+vocab_size: 17447
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0f9ec30bf455a8a9d51bb867bdbc8e4d514c8006
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/howto/captiondata/howto100m_train_puyu.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2_puyu.json
+vocab_size: 13411
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml b/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3deec04627b419ff129a14bcf6ef5f8382bca7af
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto_anet.yml
@@ -0,0 +1,64 @@
+id: anet
+
+visual_feature_type: c3d
+visual_feature_folder: 'data/anet/features/c3d'
+feature_dim: 500
+invalid_video_json: []
+train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
+gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
+gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
+train_caption_file: 'data/howto/captiondata/howto100m_train.json'
+val_caption_file: 'data/anet/captiondata/val_1.json'
+
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_anet.json
+vocab_size: 16221
+
+
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 100
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 10
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml b/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..85343a3924a24e42054f963f220b2a3e93769070
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/base_howto_yc2.yml
@@ -0,0 +1,62 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/howto/captiondata/howto100m_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/howto/vocabulary_howto_rate2_yc2.json
+vocab_size: 14538
+# dict_file: data/howto/vocabulary_howto_rate2.json
+# vocab_size: 14432
+
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..92c6b64a0b9a276122b86cabe3ad428fa8fd6c8a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87fd2de7d0e97619b3774084d4de97485ac0eedd
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dfb930405a3050a89e929c9219635231b546d3cb
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..15f8524313eeff1620c67764a09bb5268d50c249
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..da4400435c82abc6bc70c758bdbb8d52b9d68cc2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1c19e4b987a23b04a75b0eba01abfe0de360783c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6141c44f52d807457f9cf0c759ae34f0ce6c024c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57427fb4f57b5c5b3518de52811ee7d31f799017
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f45381d945d502f5a7d421ac2e2d17a7abcd5d87
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..351d2d32411e886fb1dbbe52674c262b39e1ca77
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..93a5cabefb35f8f40517cd74d3cee811008d4659
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..691b7faef9301ea5d8b205f226ecdbfe7f0618c9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..73f5f0634fa1d4b00a2fb49f1793d72e67c16c87
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6b3775fd1a72294b76078cdc49baf850ed5056ca
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..078379e88f4811421e58a8d7930e932ca6641e24
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a98bb3237ea032ebfca52ae34e59d88aa3592ffa
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..48afb4f1afd1ca36d3e0c6689d2e7d562099cbeb
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f3192d4aa5b4456deba35f0f4e404a2b11fa7e00
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b6b1224667dae251754c76aeaccbf93a63893e54
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..62f4d92634f0885f13107332cc7e23b824ff8596
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5545fc09291a7589f59c8725f38a17144f43f826
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c202054206ca67c6885cf4a34bf12a2c9eb163ef
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..64486f4d57b00de9c7b0c403ff49749c7cccee4a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ebd15f0c193ea72d878905401027fd28a330a6e9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-anet_anet_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8dd87d3485fe65777b79be423ffe881796fe879
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29489663091e3ca5546b6d55f9937dc01ff97a8e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9bf87a23994a2adb512b6fb1d1a2188f70de82a6
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..271b44b03d385f7ad93fdfff959e8b41486451df
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bf6ced9cc86151f3c804201b38a127a8cb2f5381
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9779a1898513b16aa1c2aaa57f4eb6255b2f9253
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..996e5360fc812f1943959c0bc468974117a97a94
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..472fdf294e8ba71f16708011d3d8529e5b32800c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c56573da93d2d948eae7d723904a9243498ac484
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4940aa43eec6d5fcb08905bac7892f10b28c7549
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9505834e0b888345ad6b73283848e9717a58ac98
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9f373f5e6dfc0a586f104777611c18ad10dcddb9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e28db709d1590f546524626df9b13676034d0489
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..753fdd06b9fbb789f5f5215ce9ae73d2f694bf12
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f8bf9b58b304ea4bc7becf5368d3025b192e5ea5
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5c56556156aa50810c468bb22a2d00ace2213860
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc67bf5e3278f9c16228a024efbf3ffa703b854
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_puyu_v0.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ac92f31028eece0c85e6cc642876b3fee015063
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cae9be1bd91a74c7b263399c84dd4e0ff80849b4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..798dc7b939e23386a864ee0ac3f53f2628b7138d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b5ab9e0bdfca3c12d8c932e52a0e0e20bf6e759a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..36061ccd68f650aef565bc8ce8a31be53eebf41a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
new file mode 100644
index 0000000000000000000000000000000000000000..54a0ceb172e743011c5a9183cbc31c39d3084019
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8e32f2b3092a0f11c35bf4a2ab0ab62172c23815
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9205bfd2568d41ec26689b63fcdb82f30f1e0c7b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,48 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ed0b046f4922fec52231c7c0d5c551cff82f0cc
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b40c179f05dff58d76a601a232e6fada42f29505
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 35
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..32e237518518df60263b914b65d18dba1d0b8f46
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1dcb3b5b547fda7ed525234c4f97976badc60c04
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 30
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6f775d16de8d0ca478c89310cf56ae9cc12b6d6e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..04d7bda3fde5f365542bbd033cee609457c89604
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_anet_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_anet.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 2
+top_frames: 30
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ebb97f21d137b0826c96dfea0a86e234195d39be
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..47aa1ebe8a83f7375281ca97c736b47b312d7806
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..86ad0779edfff76fd33862932cd3b534902be794
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..11cbc212d5837924bc8afb7d4635427be51ee216
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0dcf153ed24336b039f3e288035937de41f36a94
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..424cf659ab8edb6fc4b81364ce47a2d568f65c07
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..53a57713fe15fb61233549dc5ecb309e986a5508
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..13ca9fd266ebd75d23197a8e51ad913227640b06
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,46 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+iteration: 3
+width_th: 2
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f5f9a193a786aad4960447288edf675aecb58129
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_CLIP_pdvc.yml
@@ -0,0 +1,21 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_anchor: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8dbabf6d032f5848991215aec06c7fcef0fb711
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_UniVL_pdvc.yml
@@ -0,0 +1,21 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_anchor: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7a064f9627e6d2f00667c740f27f3564ce16d63
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_abox_UniVL_pdvc.yml
@@ -0,0 +1,32 @@
+id: base
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.1
+pseudo_box_type: align
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ef7c8924196f230fbf7abf4bd2aada4d8c813275
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..355c7d66081451b3513304f8b81126fd1976f918
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5121b6cf76c3b4966c382f4cfca11c71891a721c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a95a0e782daf8c2f799f90d93aa95a6eceb015b2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59967b4b1bc17087eda8b17e520c3e83e153e699
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..178db2ad770c80e3d3d365842e7df6f2a24c0fee
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7555f552813b710ea274c5068b4b4b46b15e00e8
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..726c043ff0300b678776e8391d1bb3ed962307cf
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..71be5f3eb1240f38e131b7600e58cf1a1b3f7b3d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc03cac62cb816ce7b98e9fcf4ff96167d03b682
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d054ac67dc4a7765def67b554b1c520c04cd18da
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..27e89d7b820b566c825bf2be4e6164f218664588
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..39dfc692b19e2acf130606f314214410f0720990
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fe6cba647bb5d104b990a3415706d56971426b1f
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..05c7d7053dd25cb4a3e06340268c29a14d7dc05e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..40fd330d45cf07a8e34c442d56c1339fa237c5e1
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fd4ead981978c54801c70e6dec9fc4154f6fcf40
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..86b411963f0ab337fa843b01a49b743c55d0f4ba
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a4b959243136e53599aa026ae7a386d9ae8ce41a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..259b30520a7d6eb411b14497d97e2b47939933ae
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59ad8e64a36d46297e604044ab9fa8c0fe8bf3e1
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3f8a497a405c767f138ba08ec811b417d62e5674
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0aebca8432993dd31f123e618c40a701da8ed968
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..29a1b716404048c53360c2e95eb69510f03eb1e9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,61 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9c2142a150ea00790b8c18556d0004bfa89afae8
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1b624be12c30a6132baf378a696b5d85949754b5
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,49 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 15
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+
+pseudo_box_type: similarity
+top_frames: 15
+window_size: 3
+statistic_mode: mode
+width_ratio: 1
+
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3aedf7494857125480dd5c5dc50bd3a5cf9f4260
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5104cde9760d48a7f9fa23cb1a04b825c18f3844
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8bca42c4f97b02f9f9bb5f73f37c7660a2c17f64
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f75a379f584c0e1c6937f7382106b41aa13de36b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70bb98540afb82ad228fd38386b705eac0186b43
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..377d0315f95c91a695c3452a623a2178f36f8b5e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a43f4fbb5db27f501c2e18f01a4ce609982ca832
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..edb891d5da98423d456c7b80ab35cc9a50143577
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b51463e36ec85d9216a6644801548bae016228fe
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..349183f42fc665a103e3b1b628b6f055bfbaee2d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/tasty/tasty_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature/text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87138b61dc4b554deebc0245686a152f593825fc
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl.yml
@@ -0,0 +1,57 @@
+id: tasty_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/tasty/captiondata/tasty_train.json'
+val_caption_file: 'data/tasty/captiondata/tasty_test.json'
+gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json']
+gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json']
+dict_file: data/tasty/voc_tasty_14.json
+vocab_size: 14670
+max_caption_len: 50
+
+train_proposal_type: gt
+train_proposal_sample_num: 50
+gt_proposal_sample_num: 50
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: standard
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20 # 42 is the max number of events in tasty
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1a82a7274fb3f956c8545096bdf86e3e1f9c0468
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/tasty/tasty_tsn_pdvcl_voc30.yml
@@ -0,0 +1,57 @@
+id: tasty_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/tasty/captiondata/tasty_train.json'
+val_caption_file: 'data/tasty/captiondata/tasty_test.json'
+gt_file_for_eval: ['data/tasty/captiondata/tasty_test.json']
+gt_file_for_para_eval: ['data/tasty/captiondata/para/tasty_test_para.json']
+dict_file: data/tasty/vocabulary_tasty.json
+vocab_size: 30171
+max_caption_len: 50
+
+train_proposal_type: gt
+train_proposal_sample_num: 50
+gt_proposal_sample_num: 50
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: standard
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 42 # 42 is the max number of events in tasty
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml b/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d14b206d40c4f2399400913d1ff15b8659b575b9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/vlep/base_vlep-yc2_yc2.yml
@@ -0,0 +1,61 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: ['data/vlep/captiondata/vlep_meta.json', 'data/yc2/captiondata/yc2_train.json']
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+max_caption_len: 50
+
+dict_file: data/vlep/vlep_vocabulary_rate2_yc2.json
+vocab_size: 4491
+# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
+# vocab_size: 8531
+
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+epoch: 10
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cccf2ec29dc7c513d8b4cc90d4a6f6a3fabbc28d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/vlep/vlep-yc2_yc2_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,44 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/vlep/base_vlep-yc2_yc2.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/output/vlep_clip_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/vlep/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..00399b0c2f2a021a7476750b003026045d776cd1
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_UniVL_pdvc.yml
@@ -0,0 +1,20 @@
+id: yc2_UniVL_pdvc
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+
+feature_dim: 768
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ab29c8850e08c4549496f768a65f7ff4d08f33ba
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ViP_pdvc.yml
@@ -0,0 +1,19 @@
+id: yc2_ViP_pdvc_norm
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP-ViP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text/']
+feature_dim: 512
+hidden_dim: 512
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8ecdedb6ef1d7787c74040ef64b644bfb98d956
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_abox_ViP_pdvc.yml
@@ -0,0 +1,29 @@
+id: yc2_abox_ViP_pseudo_similarity
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP-ViP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/visual_norm']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/ViP_features/text']
+feature_dim: 512
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_type: similarity
+use_anchor: 0
+pretrained_language_model: CLIP-ViP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c15a2a7c6e8dea1452f91f26dda6fe9fb6ebe8f7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..217693d7afa3593227e2d368fdb2552cd9371369
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..13eef191acd1177c2d4e7bdc64f2b755e80ab5e5
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..88bd8b2fec5674da63b5171fa4c7bfeee0426fc9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4846c148573235eb6aa047cb024e6c78a4e1cba2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4b329f14bec0848ba829c1b4048e7fe22fb46e83
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8d62fd00faf076f269351f19da5083817b419ff0
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e25901b1bf82142d41e05343f65e1ba59b8d908b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..77143cc57f11432ea5001da37c0014eb1696acc2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..87ff91cad79953a42bb1b582e8170779d1e147c7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..388cbc2b2527190f4ccf17c7006ec9adee33ae5e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b7d7cecca9a90d08a1fe516e115891645fb0b6
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c49641fdda1e81d5e713e9b615d3d1186a7fea7d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dbb37176adcdc33825abe0ed4a943588ad157f4a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b271a74c0f4f3d976410fcc2b607d12056124d08
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5d91010f653fe7c8f52caead5e7737a8ea102fd9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bed129c0b6368d63e430310d7caa0ce4a633e329
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca26e6a13b9df3b80e91cf34b8668676895d6214
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c5086a3c1ab5482fd8595d66ebd5cecd3da4502c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ec5cdeb72ada972c4b0ec906ed3d96060a7e018
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9d56829b138e152c73a8513f85f3f086c40ae838
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..042ae96b01a2c741fe1304441db5bd0ca14109b4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e9f47c5587ae99e197c9e4c2d87ccc4545923143
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..78f0529a85faecb3ca48833e213e1be64072439a
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..615d745a91273d92d158ecbd28e9eb7e5ec77640
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f2fe9580c80c0a6c19ebd865cc9625a7f3b997ec
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a3866db1b5556c8b1c7a3f37a37686bdc2170a13
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..707926c6592afdac874f5ba9bafb7c3855e9b18e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cbc2e0503505aa158c62d8f3687cce87a45ece37
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6ff93b47407b09e7af54d101c6dfcab1d359c1d3
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d8bdddf2857f8baee722f798460eee584a75e07e
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..35cf1c1bee6180fdd84cf580e3266c24f80bfb2b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c68cd4381b5ddc5447b4da98c968702fcfad52d2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b1dcf629977378040564bf4b9256f66fe8a76282
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_anc_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 1
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6768351e467f9cdcfb4cf503621faee1d28da7d5
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..48c7d7ff81d64b9b885be7e8c9c114d5b3a177c6
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a2880d6336436d6d2da1330e3835323340f5ddd7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..96d7bc4b0db3ea7647ac39f487d43fe96d1b9846
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(align)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: align
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6dd2d07dd9d060137eeb3f135a7e2a420d110fdf
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6043b9b8e94f7f09c976fd10fd340ce0370dbc21
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ef3bd5c6fccf6e8fa460e56fbc89d5cda0ad5a16
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ba9e35266c5ecce655d98fbda735fffd5505ab3c
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..373ef4c276a633c7923d9cdf106d81863573f378
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e6e781eb010da1f9c67a8362776119348f972795
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c2b90ed1817ba9b5af6f71c7aa48499550fed905
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..71cdb3e5f6a305eeefcf987c447ad38ce2c0b9eb
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..14e4add2192671c62c3c5ecab34be01db392ead2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..658a48fbf1a31ce7522f65e4f04453965f1a1130
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8c2448ad587b99db12d4004259da8ad83c400547
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca9efe1df117e81a57d9f4c0b37dccef8ce071a4
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ae2b3a1874058268562185df28b4568088518bd
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9f416921a9d59d98d643118774215085f50e4adf
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_CLIP_refine.yml
@@ -0,0 +1,42 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8a43d8c6e42cb71191f4db57266b12c2f8ff1df7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ce8c21fb91989e7b37c1504989be65eaa5c17c1
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v1)_UniVL_refine.yml
@@ -0,0 +1,42 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v1
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..25c75deb4061ea7fe6c59f5d63d26b85c332f8ce
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c1a0e79d75681650dce5753ea1934f39e05931ba
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_CLIP_refine.yml
@@ -0,0 +1,43 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7e6f5990069b64bd922d068c9721144ee3ed8467
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL.yml
@@ -0,0 +1,39 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a061bab74963587e8c148e67c47ba3e8d8e7bdc9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
@@ -0,0 +1,43 @@
+id: basic
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+width_th: 0.5
+statistic_mode: mode
+width_ratio: 1
+window_size: 3
+top_frames: 10
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 30
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6cf3256fca24a4cdaa0c9e8c89a4fed74edd684b
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e13936ee7c32a85bb8553de1bddf8dc85f6acdd7
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..31d2af3cfa6d15ec3ce1067648dcb89cf410d309
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1d5096ca8903c240faa1e45589e77786800bf4d9
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_index)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_index
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c04663ab87edcd38ea5a0fa00c17d99f3203fc02
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca1d85d39c264126d0cdcad3fb4ef6dcd9d78249
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_CLIP_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e431de6395e6a745cf9c9e5f560621c1ef911015
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d4b9dec009027bfad79d96f94545c19465146271
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_(weight_sim)_UniVL_refine.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 1
+pseudo_box_aug: 1
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: weight_sim
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e66d364a384fa467573af7703d97baf23098c9a2
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_CLIP.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['CLIP']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: CLIP
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml
new file mode 100644
index 0000000000000000000000000000000000000000..60d558dd0b51cbe8d184681d7227c91e76246540
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_ori_GT_UniVL.yml
@@ -0,0 +1,38 @@
+id: refine_aug(5,0.3)_top3_1stage
+base_cfg_path: cfgs_base/yc2/yc2_tsn_pdvcl.yml
+
+visual_feature_type: ['UniVL']
+visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
+text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
+feature_dim: 768
+hidden_dim: 512
+
+use_pseudo_box: 0
+pseudo_box_aug: 0
+pseudo_box_aug_num: 5
+pseudo_box_aug_ratio: 0.3
+refine_pseudo_box: 0
+refine_pseudo_stage_num: 1
+merge_k_boxes: 3
+pseudo_box_type: similarity
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 12
+
+use_anchor: 0
+pretrained_language_model: UniVL
+disable_contrastive_projection: 1
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 100
+
+ec_alpha: 1.0
+
+self_iou_loss_coef: 0.0
+ref_rank_loss_coef: 0.0
+contrastive_loss_start_coef: 0.0
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fc66b3cbff2550bf0264a79dd43d6b93ab7256a0
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc.yml
@@ -0,0 +1,13 @@
+id: yc2_tsn_pdvc
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+
+num_queries: 50
+
+ec_alpha: 1.0
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9a1c528c5c792081cbb4873983306c4268a23d55
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_gt.yml
@@ -0,0 +1,9 @@
+id: yc2_tsn_pdvc_gt
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl_gt.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml
new file mode 100644
index 0000000000000000000000000000000000000000..79ef87700f600af96cb41f1953b4fb1da336c8ec
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior.yml
@@ -0,0 +1,16 @@
+id: yc2_tsn_pdvc_prior
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+num_queries: 50
+
+ec_alpha: 1.0
+
+transformer_input_type: prior_proposals
+
+#dec_layers: 3
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml
new file mode 100644
index 0000000000000000000000000000000000000000..14941f50b2699cc25e74ee388bfe086ae0bda74d
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvc_prior_add.yml
@@ -0,0 +1,18 @@
+id: yc2_tsn_pdvc_prior_add
+base_cfg_path: cfgs_base/yc2_tsn_pdvcl.yml
+
+caption_decoder_type: standard
+cap_nheads: 1
+cap_dec_n_points: 4
+cap_num_feature_levels: 4
+soft_attention: 1
+att_hid_size: 512
+num_queries: 50
+
+prior_manner: add
+
+ec_alpha: 1.0
+
+transformer_input_type: prior_proposals
+
+#dec_layers: 3
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1420f8abf88d8bbdd6c9cf05454f0949a9fb6c44
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl.yml
@@ -0,0 +1,55 @@
+id: yc2_tsn_pdvcl
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/yc2/captiondata/yc2_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+dict_file: data/yc2/vocabulary_youcook2.json
+vocab_size: 1607
+
+train_proposal_type: gt
+train_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0
+set_cost_class: 2
+self_iou_loss_coef: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 4
+bbox_loss_coef: 0
+cls_loss_coef: 2
+count_loss_coef: 0.5
+max_eseq_length: 20
+lloss_cross_entropy: 0
+lloss_focal_loss: 0
+lloss_gau_mask: 1
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..435e85fc3946b15c389de755987b34f8bd75d469
--- /dev/null
+++ b/yc2_univl/backup/cfgs_base/yc2/yc2_tsn_pdvcl_gt.yml
@@ -0,0 +1,57 @@
+id: yc2_tsn_pdvcl_gt
+
+visual_feature_type: ['resnet', 'bn']
+visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
+feature_dim: 3072
+invalid_video_json: []
+train_caption_file: 'data/yc2/captiondata/yc2_train.json'
+val_caption_file: 'data/yc2/captiondata/yc2_val.json'
+gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
+dict_file: data/yc2/vocabulary_youcook2.json
+vocab_size: 1607
+
+train_proposal_type: gt
+gt_proposal_sample_num: 30
+sample_method: nearest
+
+batch_size: 1
+lr: 0.00005
+learning_rate_decay_start: 8
+learning_rate_decay_every: 3
+learning_rate_decay_rate: 0.5
+weight_decay: 0.0001
+save_all_checkpoint: 0
+
+num_queries: 100
+dec_layers: 2
+enc_layers: 2
+transformer_ff_dim: 512
+transformer_dropout_prob: 0.1
+frame_embedding_num: 200
+caption_decoder_type: light
+att_hid_size: 0
+
+#with_box_refine: 1
+
+fix_xcw: 1
+set_cost_caption: 0
+set_cost_giou: 4
+set_cost_bbox: 0.0001
+set_cost_class: 0
+#cost_alpha: 0.5
+#cost_gamma: 1
+#focal_alpha: 0.5
+#focal_gamma: 1
+caption_loss_coef: 2
+giou_loss_coef: 0
+bbox_loss_coef: 0
+cls_loss_coef: 0
+count_loss_coef: 0
+#max_eseq_length: 10
+#lloss_cross_entropy: 0
+#lloss_focal_loss: 0
+#lloss_gau_mask: 1
+
+#two_stage: 1
+transformer_input_type: gt_proposals
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45
--- /dev/null
+++ b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45
--- /dev/null
+++ b/yc2_univl/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 30
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165
--- /dev/null
+++ b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165
--- /dev/null
+++ b/yc2_univl/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
@@ -0,0 +1,19 @@
+id: ''
+base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+
+
+pseudo_box_aug_num: 8
+pseudo_box_aug_ratio: 0.02
+pseudo_box_aug_mode: random_range
+refine_pseudo_box: 1
+refine_pseudo_stage_num: 2
+merge_k_boxes: 3
+pseudo_box_type: similarity_op_order_v2
+top_frames: 25
+width_ratio: 1
+iteration: 3
+width_th: 1
+use_query_box_for_refine: 0
+gt_proposal_sample_num: 20
+mil_loss_coef: 0
+merge_criterion: ins_cap_topk
\ No newline at end of file
diff --git a/yc2_univl/backup/change_config_add.py b/yc2_univl/backup/change_config_add.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b9ecff04cf568dba78df9a67a4a418abc9edf08
--- /dev/null
+++ b/yc2_univl/backup/change_config_add.py
@@ -0,0 +1,80 @@
+import os
+import yaml
+import argparse
+
+# add dryrun option
+parser = argparse.ArgumentParser(description='Change config files')
+parser.add_argument('--dryrun', action='store_true', help='dryrun')
+args = parser.parse_args()
+
+
+
+
+
+# Define the folder containing YAML files
+# folder_path = 'cfgs_ref'
+# folder_path = 'cfgs_base/anet'
+# folder_path = 'cfgs'
+folder_path = 'cfgs_yc2_ft_perc'
+
+file_filter = ''
+
+
+
+# Define the string to find and the replacement string
+# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video'
+# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj'
+# find_string = 'data/yc2/captiondata/yc2'
+# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text"
+find_string = "ft_gt_percent: 0.25"
+# find_string = "pdvc_mode: 0"
+
+# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'
+# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'
+# replace_string = 'data/tasty/captiondata/tasty'
+# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml"
+replace_string = "ft_gt_percent: 0.75"
+# replace_string = "pdvc_mode: 1"
+
+old_name = 'perc0.25'
+new_name = 'perc0.75'
+
+def replace_yaml(yaml_file_path, new_file_path, old_string, new_string):
+    # Read the YAML file as text
+    with open(yaml_file_path, 'r') as file:
+        yaml_text = file.read()
+
+    # Replace a string (e.g., 'old_string') with another string (e.g., 'new_string')
+
+    yaml_text = yaml_text.replace(old_string, new_string)
+
+    # Save the modified text back to a YAML file
+    with open(new_file_path, 'w') as file:
+        file.write(yaml_text)
+
+    # # Load the modified YAML data (optional)
+    # modified_yaml_data = yaml.safe_load(yaml_text)
+
+# You can now work with the modified_yaml_data as needed
+
+filelist = os.listdir(folder_path)
+# Iterate over the files in the folder
+for filename in filelist:
+    if not file_filter in filename:
+        continue
+    # breakpoint()
+    if filename.endswith('.yaml') or filename.endswith('.yml') and old_name in filename:
+        # breakpoint()
+        file_path = os.path.join(folder_path, filename)
+        if old_name == '':
+            new_filename = filename.replace('.yml',  '_{}.yml'.format(new_name))
+        else:
+            new_filename = filename.replace(old_name, new_name)
+        new_file_path = os.path.join(folder_path, new_filename)
+
+        if args.dryrun:
+            print("Dryrun: {} -> {}".format(file_path, new_file_path))
+        else:
+            replace_yaml(file_path, new_file_path, find_string, replace_string)
+
+print("String replacement completed.")
\ No newline at end of file
diff --git a/yc2_univl/backup/demo.py b/yc2_univl/backup/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8e3ab4946905f140f377d120a14deff85f4622f
--- /dev/null
+++ b/yc2_univl/backup/demo.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+# Example similarity matrix with shape [10, 200]
+similarity_matrix = np.random.rand(10, 200)
+
+# Example range of indices for each step (stored in center and width arrays)
+center = np.random.randint(0, 100, size=(10,))
+width = np.random.randint(10, 20, size=(10,))
+
+# Calculate the start and end indices for each step
+start_indices = np.clip(center - width // 2, 0, similarity_matrix.shape[1])
+end_indices = np.clip(center + width // 2, 0, similarity_matrix.shape[1])
+
+# Generate column indices for each range
+col_indices = np.arange(similarity_matrix.shape[1])
+
+# Get topk values and corresponding indices
+topk = 5
+topk_values = []
+topk_indices = []
+
+for start, end in zip(start_indices, end_indices):
+    # Slice the similarity matrix within the specified range
+    range_values = similarity_matrix[:, start:end]
+
+    # Find the indices of the topk values within the range
+    sorted_indices = np.argsort(range_values, axis=1)[:, -topk:]
+    sorted_indices += start  # Adjust indices to the absolute position
+
+    # Flatten and concatenate the indices
+    row_indices = np.arange(len(sorted_indices))[:, np.newaxis]
+    indices_flat = np.ravel_multi_index((row_indices.flatten(), sorted_indices.flatten()), similarity_matrix.shape)
+    
+    # Append topk values and indices
+    topk_values.append(np.take(similarity_matrix, indices_flat))
+    topk_indices.append(np.column_stack((row_indices.repeat(topk, axis=1).flatten(), sorted_indices.flatten())))
+
+# Convert lists to arrays
+topk_values = np.array(topk_values)
+topk_indices = np.array(topk_indices)
+
+print("Topk values within the specified range:", topk_values)
+print("Topk indices within the specified range:", topk_indices)
+
diff --git a/yc2_univl/backup/eval.py b/yc2_univl/backup/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2c59801e0e5a9e72ce22521699e53d796efd49b
--- /dev/null
+++ b/yc2_univl/backup/eval.py
@@ -0,0 +1,146 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import sys
+import torch
+import numpy as np
+import time
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+from eval_utils import evaluate
+from pdvc.pdvc import build
+from misc.utils import create_logger
+from data.video_dataset import PropSeqDataset, collate_fn
+from torch.utils.data import DataLoader
+from os.path import basename
+import pandas as pd
+
+def create_fake_test_caption_file(metadata_csv_path):
+    out = {}
+    df = pd.read_csv(metadata_csv_path)
+    for i, row in df.iterrows():
+        out[basename(row['filename']).split('.')[0]] = {'duration': row['video-duration'], "timestamps": [[0, 0.5]], "sentences":["None"]}
+    fake_test_json = '.fake_test_json.tmp'
+    json.dump(out, open(fake_test_json, 'w'))
+    return fake_test_json
+
+def main(opt):
+    folder_path = os.path.join(opt.eval_save_dir, opt.eval_folder)
+    if opt.eval_mode == 'test':
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+    logger = create_logger(folder_path, 'val.log')
+    if opt.eval_model_path:
+        model_path = opt.eval_model_path
+        infos_path = os.path.join('/'.join(opt.eval_model_path.split('/')[:-1]), 'info.json')
+    else:
+        model_path = os.path.join(folder_path, 'model-best.pth')
+        infos_path = os.path.join(folder_path, 'info.json')
+
+    logger.info(vars(opt))
+
+    with open(infos_path, 'rb') as f:
+        logger.info('load info from {}'.format(infos_path))
+        old_opt = json.load(f)['best']['opt']
+
+    for k, v in old_opt.items():
+        if k[:4] != 'eval':
+            vars(opt).update({k: v})
+
+    opt.transformer_input_type = opt.eval_transformer_input_type
+
+    if not torch.cuda.is_available():
+        opt.nthreads = 0
+    # Create the Data Loader instance
+
+    if opt.eval_mode == 'test':
+        opt.eval_caption_file = create_fake_test_caption_file(opt.test_video_meta_data_csv_path)
+        opt.visual_feature_folder = opt.test_video_feature_folder
+
+    val_dataset = PropSeqDataset(opt.eval_caption_file,
+                                 opt.visual_feature_folder, opt.text_feature_folder,
+                                 opt.dict_file, False, opt.eval_proposal_type,
+                                 opt)
+    loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                        shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn)
+
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = val_dataset.translator
+
+
+
+    while not os.path.exists(model_path):
+        raise AssertionError('File {} does not exist'.format(model_path))
+
+    logger.debug('Loading model from {}'.format(model_path))
+    loaded_pth = torch.load(model_path, map_location=opt.eval_device)
+    epoch = loaded_pth['epoch']
+
+    # loaded_pth = transfer(model, loaded_pth, model_path+'.transfer.pth')
+    model.load_state_dict(loaded_pth['model'], strict=True)
+    model.eval()
+
+    model.to(opt.eval_device)
+
+    if opt.eval_mode == 'test':
+        out_json_path = os.path.join(folder_path, 'dvc_results.json')
+        evaluate(model, criterion, postprocessors, loader, out_json_path,
+                         logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=True)
+
+
+    else:
+        out_json_path = os.path.join(folder_path, '{}_epoch{}_num{}_alpha{}.json'.format(
+            time.strftime("%Y-%m-%d-%H-%M-%S_", time.localtime()) + str(opt.id), epoch, len(loader.dataset),
+            opt.ec_alpha))
+        caption_scores, eval_loss = evaluate(model, criterion, postprocessors, loader, out_json_path,
+                         logger, args=opt, alpha=opt.ec_alpha, dvc_eval_version=opt.eval_tool_version, device=opt.eval_device, debug=False, skip_lang_eval=False)
+        # breakpoint()
+        avg_eval_score = {key: np.array(value).mean() for key, value in caption_scores.items() if key !='tiou'}
+        # avg_eval_score2 = {key: np.array(value).mean() * 4917 / len(loader.dataset) for key, value in caption_scores.items() if key != 'tiou'}
+
+        # logger.info(
+        #     '\nValidation result based on all 4917 val videos:\n {}\n avg_score:\n{}'.format(
+        #                                                                                caption_scores.items(),
+        #                                                                                avg_eval_score))
+
+        logger.info(
+                '\nValidation result based on {} available val videos:\n avg_score:\n{}'.format(len(loader.dataset),
+                                                                                           avg_eval_score))
+
+    logger.info('saving reults json to {}'.format(out_json_path))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--eval_save_dir', type=str, default='save')
+    parser.add_argument('--eval_mode', type=str, default='eval', choices=['eval', 'test'])
+    parser.add_argument('--test_video_feature_folder', type=str, nargs='+', default=None)
+    parser.add_argument('--test_video_meta_data_csv_path', type=str, default=None)
+    parser.add_argument('--eval_folder', type=str, required=True)
+    parser.add_argument('--eval_model_path', type=str, default='')
+    parser.add_argument('--eval_tool_version', type=str, default='2018', choices=['2018', '2021'])
+    parser.add_argument('--eval_caption_file', type=str, default='data/anet/captiondata/val_1.json')
+    parser.add_argument('--eval_proposal_type', type=str, default='gt')
+    parser.add_argument('--eval_transformer_input_type', type=str, default='queries', choices=['gt_proposals', 'prior_proposals','queries'])
+    parser.add_argument('--gpu_id', type=str, nargs='+', default=['0'])
+    parser.add_argument('--eval_device', type=str, default='cuda')
+    parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all'])
+    opt = parser.parse_args()
+
+    #breakpoint()
+
+    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+    if True:
+        torch.backends.cudnn.enabled = False
+    main(opt)
diff --git a/yc2_univl/backup/eval_utils.py b/yc2_univl/backup/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4cd727ecebd0364fe9ad45d94f582fdcb17d54b
--- /dev/null
+++ b/yc2_univl/backup/eval_utils.py
@@ -0,0 +1,241 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import collections
+import torch
+import numpy as np
+import json
+from collections import OrderedDict
+from tqdm import tqdm
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+
+
+from densevid_eval3.eval_soda import eval_soda
+from densevid_eval3.eval_para import eval_para
+from densevid_eval3.eval_dvc import eval_dvc
+
+def calculate_avg_proposal_num(json_path):
+    data = json.load(open(json_path))
+    return np.array([len(v) for v in data['results'].values()]).mean()
+
+def convert_tapjson_to_dvcjson(tap_json, dvc_json):
+    data = json.load(open(tap_json, 'r'))
+    data['version'] = "VERSION 1.0"
+    data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"}
+
+    all_names = list(data['results'].keys())
+    for video_name in all_names:
+        for p_info in data['results'][video_name]:
+            p_info['timestamp'] = p_info.pop('segment')
+            p_info['proposal_score'] = p_info.pop('score')
+            p_info['sentence_score'] = p_info.pop('sentence_score', 0)
+        data['results']["v_" + video_name] = data['results'].pop(video_name)
+    json.dump(data, open(dvc_json, 'w'))
+
+
+def convert_dvcjson_to_tapjson(dvc_json, tap_json):
+    data = json.load(open(dvc_json, 'r'))['results']
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+
+    all_names = list(data.keys())
+    for video_name in all_names:
+        video_info = []
+        event_num = len(data[video_name])
+        timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)]
+        sentences = [data[video_name][i]['sentence'] for i in range(event_num)]
+        for i, timestamp in enumerate(timestamps):
+            score = data[video_name][i].get('proposal_score', 1.0)
+            video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)})
+        out['results'][video_name[2:]] = video_info
+    json.dump(out, open(tap_json, 'w'))
+
+
+def convert_gtjson_to_tapjson(gt_json, tap_json):
+    data = json.load(open(gt_json, 'r'))
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+
+    all_names = list(data.keys())
+    for video_name in all_names:
+        video_info = []
+        timestamps = data[video_name]['timestamps']
+        sentences = data[video_name]['sentences']
+        for i, timestamp in enumerate(timestamps):
+            video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]})
+        out['results'][video_name[2:]] = video_info
+    with open(tap_json, 'w') as f:
+        json.dump(out, f)
+
+
+def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8):
+    data = json.load(open(dvc_json, 'r'))['results']
+    out = {}
+    out['version'] = "VERSION 1.0"
+    out['external_data'] = {'used:': True, 'details': "GT proposals"}
+    out['results'] = {}
+    all_names = list(data.keys())
+    num = 0
+    bad_vid = 0
+    for video_name in all_names:
+        info = data[video_name]
+        new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True)
+        new_info = [p for p in new_info if p[ranking_key] > score_thres]
+        new_info = new_info[:top_n]
+        out['results'][video_name] = new_info
+        num += len(new_info)
+        if len(new_info) == 0:
+            bad_vid += 1
+            out['results'].pop(video_name)
+    print('average proosal number: {}'.format(num / len(all_names)))
+    print('bad videos number: {}'.format(bad_vid))
+    print('good videos number: {}'.format(len(out['results'])))
+    with open(out_json, 'w') as f:
+        json.dump(out, f)
+
+
+def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'):
+    score = collections.defaultdict(lambda: -1)
+    # top_n = 3
+    # top_n_filename = dvc_filename + '.top{}.json'.format(top_n)
+    # get_topn_from_dvcjson(dvc_filename, top_n_filename, top_n=top_n, ranking_key=ranking_key)
+    # dvc_score = eval_dvc(json_path=top_n_filename, reference=gt_filenames)
+    # dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
+    # dvc_score.update(eval_soda(top_n_filename, ref_list=gt_filenames))
+    # dvc_score.update(eval_para(top_n_filename, referneces=para_gt_filenames))
+    # for key in dvc_score.keys():
+    #     score[key] = dvc_score[key]
+    if transformer_input_type == 'prior_proposals':
+        dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames)
+        score.update(dvc_score)
+        #breakpoint()
+        return score
+        
+    else:
+        if rerank:
+            dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0)
+        dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version)
+        dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
+        dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames))
+        dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames))
+        score.update(dvc_score)
+        return score
+
+
+def save_dvc_json(out_json, path):
+    with open(path, 'w') as f:
+        out_json['valid_video_num'] = len(out_json['results'])
+        out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item()
+        json.dump(out_json, f)
+
+def reranking(p_src, alpha, temperature):
+    print('alpha: {}, temp: {}'.format(alpha, temperature))
+    d = json.load(open(p_src))
+    d_items = list(d['results'].items())
+    for k,v in d_items:
+        if True:
+            sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v]
+            prop_score = [p['proposal_score'] for p in v]
+            joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score))
+        for i,p in enumerate(v):
+            p['joint_score'] = joint_score[i]
+        v = sorted(v, key=lambda x: x['joint_score'], reverse=True)
+        topN = v[0]['pred_event_count']
+        v = v[:topN]
+        v = sorted(v, key=lambda x: x['timestamp'])
+        d['results'][k] = v
+    save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature)
+    save_dvc_json(d, save_path)
+    return save_path
+
+
+def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0,
+             alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False):
+    out_json = {'results': {},
+                'version': "VERSION 1.0",
+                'external_data': {'used:': True, 'details': None}}
+    opt = loader.dataset.opt
+
+    loss_sum = OrderedDict()
+    with torch.set_grad_enabled(False):
+        for dt in tqdm(loader, disable=opt.disable_tqdm):
+            # valid_keys = ["video_tensor", "video_length", "video_mask", "video_key"]
+            # dt = {key: value for key, value in dt.items() if key in valid_keys}
+            dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt = collections.defaultdict(lambda: None, dt)
+
+            dt['video_target'] = [
+                    {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+            # output, loss = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
+            output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
+            orig_target_sizes = dt['video_length'][:, 1]
+
+            weight_dict = criterion.weight_dict
+            # Huabin comment this line (anything about 'loss') to avoid reporting losses during evaluation
+            # final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            
+            # Huabin comment this line to avoid reporting losses during evaluation
+            # for loss_k, loss_v in loss.items():
+            #     loss_sum[loss_k] = loss_sum.get(loss_k, 0) + loss_v.item()
+            # loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            results = postprocessors['bbox'](output, orig_target_sizes, loader)
+
+            batch_json = {}
+            for idx, video_name in enumerate(dt['video_key']):
+                segment = results[idx]['boxes'].cpu().numpy()
+                raw_boxes = results[idx]['raw_boxes'].cpu().numpy()
+                # pdb.set_trace()
+                #breakpoint()
+                batch_json[video_name] = [
+                    {
+                        "timestamp": segment[pid].tolist(),
+                        "raw_box": raw_boxes[pid].tolist(),
+                        "proposal_score": results[idx]['scores'][pid].item(),
+                        "sentence": results[idx]['captions'][pid],
+                        "sentence_score": results[idx]['caption_scores'][pid],
+                        'query_id': results[idx]['query_id'][pid].item(),
+                        'vid_duration': results[idx]['vid_duration'].item(),
+                        'pred_event_count': results[idx]['pred_seq_len'].item(),
+                    }
+                    for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold]
+            out_json['results'].update(batch_json)
+            if debug and len(out_json['results']) > 5:
+                break
+
+    save_dvc_json(out_json, dvc_json_path)
+
+    if skip_lang_eval:
+        return None, None
+    
+    # Huabin comment this line to avoid reporting losses during evaluation
+    # for k in loss_sum.keys():
+    #     loss_sum[k] = np.round(loss_sum[k] / (len(loader) + 1e-5), 3).item()
+    # logger.info('loss: {}'.format(loss_sum))
+    scores = eval_metrics(dvc_json_path,
+                          gt_filenames=opt.gt_file_for_eval,
+                          para_gt_filenames=opt.gt_file_for_para_eval,
+                          alpha=alpha,
+                          rerank=(opt.count_loss_coef > 0),
+                          dvc_eval_version=dvc_eval_version,
+                          transformer_input_type=opt.transformer_input_type
+                          )
+
+    out_json.update(scores)
+    save_dvc_json(out_json, dvc_json_path)
+    # return scores, loss_sum
+    return scores, []
diff --git a/yc2_univl/backup/misc/MIL_loss.py b/yc2_univl/backup/misc/MIL_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8a234e01695ca8871b045a0ba31b13e9e79883a
--- /dev/null
+++ b/yc2_univl/backup/misc/MIL_loss.py
@@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmdet.models.losses import accuracy
+from mmdet.models.losses.cross_entropy_loss import _expand_onehot_labels
+from .utils import weight_reduce_loss
+
+
+class MILLoss(nn.Module):
+
+    def __init__(self,
+                 # use_binary=True,
+                 # reduction='mean',
+                 binary_ins=False,
+                 loss_weight=1.0, eps=1e-6, loss_type='gfocal_loss'):
+        """
+        Args:
+            use_binary (bool, optional): Whether to the prediction is
+                used for binary cross entopy
+            reduction (str, optional): The method used to reduce the loss into
+                a scalar. Defaults to 'mean'. Options are "none", "mean" and
+                "sum".
+            loss_weight (float, optional): Weight of loss. Defaults to 1.0.
+        """
+        super(MILLoss, self).__init__()
+        # self.use_binary = use_binary
+        # self.reduction = reduction
+        self.loss_weight = loss_weight
+        # if self.use_sigmoid:
+        # self.loss_cls = CrossEntropyLoss(use_sigmoid=True, loss_weight=loss_weight)
+        self.eps = eps
+        self.loss_type = loss_type
+        self.binary_ins = binary_ins
+
+    def gfocal_loss(self, p, q, w=1.0):
+        l1 = (p - q) ** 2
+        l2 = q * (p + self.eps).log() + (1 - q) * (1 - p + self.eps).log()
+        return -(l1 * l2 * w).sum(dim=-1)
+
+    def forward(self, bag_cls_prob, bag_ins_outs, labels, valid, weight=None):
+        """
+            bag_cls_outs: (B, N, C),
+            bag_ins_outs: (B, N, C*2/C)
+            valid: (B, N, 1/C)
+            labels: (B, )
+        Returns:
+        """
+        if self.binary_ins:
+            assert bag_ins_outs.shape[-1] / bag_cls_prob.shape[-1] == 2
+        else:
+            assert bag_ins_outs.shape[-1] == bag_cls_prob.shape[-1]
+
+        B, N, C = bag_cls_prob.shape
+        prob_cls = bag_cls_prob.unsqueeze(dim=-1)  # (B, N, C, 1)
+        prob_ins = bag_ins_outs.reshape(B, N, C, -1)  # (B, N, C, 2/1)
+        prob_ins = prob_ins.softmax(dim=1) * valid.unsqueeze(dim=-1)
+        prob_ins = F.normalize(prob_ins, dim=1, p=1)
+        prob = (prob_cls * prob_ins).sum(dim=1)
+        acc = accuracy(prob[..., 0], labels)
+
+        label_weights = (valid.sum(dim=1) > 0).float()
+        labels = _expand_onehot_labels(labels, None, C)[0].float()
+        num_sample = max(torch.sum(label_weights.sum(dim=-1) > 0).float().item(), 1.)
+
+        if prob.shape[-1] == 1:
+            prob = prob.squeeze(dim=-1)
+        elif prob.shape[-1] == 2:  # with binary ins
+            pos_prob, neg_prob = prob[..., 0], prob[..., 1]
+            prob = torch.cat([pos_prob, neg_prob])
+            neg_labels = labels.new_zeros(labels.shape)
+            labels = torch.cat([labels, neg_labels])
+            label_weights = torch.cat([label_weights, label_weights])
+
+        if self.loss_type == 'gfocal_loss':
+            loss = self.gfocal_loss(prob, labels, label_weights)
+            if weight is not None:
+                # modified by fei ##############################################################3
+                weight=weight.squeeze(-1)
+        elif self.loss_type == 'binary_cross_entropy':
+            # if self.use_sigmoid:
+            # method 1:
+            # loss = self.loss_cls(
+            #     prob,
+            #     labels,
+            #     label_weights,
+            #     avg_factor=avg_factor,
+            #     reduction_override=reduction_override)
+            # method 2
+            prob = prob.clamp(0, 1)
+            # modified by fei ##############################################################3
+            loss = F.binary_cross_entropy(prob, labels.float(), None, reduction="none")
+        else:
+            raise ValueError()
+        loss = weight_reduce_loss(loss, weight, avg_factor=num_sample) * self.loss_weight
+        return loss, acc, num_sample
\ No newline at end of file
diff --git a/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc b/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a07b1b2f7c2819d5dcfeb1e5e462ac236b6d940
Binary files /dev/null and b/yc2_univl/backup/misc/__pycache__/utils.cpython-38.pyc differ
diff --git a/yc2_univl/backup/misc/build_vocab.py b/yc2_univl/backup/misc/build_vocab.py
new file mode 100644
index 0000000000000000000000000000000000000000..181c9ff27a7edc9d57e16cba107a87857062e24b
--- /dev/null
+++ b/yc2_univl/backup/misc/build_vocab.py
@@ -0,0 +1,66 @@
+# coding:utf-8
+import json
+
+# file_path_list = ["data/captiondata/train_modified.json", "data/captiondata/val_1.json", "data/captiondata/val_2.json"]
+file_path_list = ["data/captiondata/yc2/yc2_train.json", "data/captiondata/yc2/yc2_val.json"]
+
+count_threshold = 2 # 4 for anet, 2 for youcook2
+# output_path = './data/vocabulary_activitynet.json'
+output_path = './data/vocabulary_youcook2.json'
+
+mark = [',', ':', '!', '_', ';', '-', '.', '?', '/', '"', '\\n', '\\']
+
+count_vocal = {}
+
+for file_path in file_path_list:
+    data = json.load(open(file_path))
+    video_ids = data.keys()
+    print('video num of ' + file_path.split('/')[-1], len(video_ids))
+    for video_id in video_ids:
+        sentences = data[video_id]["sentences"]
+        for sentence in sentences:
+            for m in mark:
+                if m in sentence:
+                    sentence = sentence.replace(m, " ")
+                sentence = sentence.replace("  ", " ")
+                sentence = sentence.replace("  ", " ")
+                sentence = sentence.replace("  ", " ")
+
+            sentence = sentence.lstrip()
+            sentence = sentence.rstrip()
+            sentence = sentence.lower()
+            sentence = sentence.split(" ")
+            length = len(sentence)
+
+            # print(sentence)
+            for word in sentence:
+                # print(type(word))
+                for m in word:
+                    if m == ' ':
+                        print('warning !')
+                        word = word.replace(m, '')
+                if word == '':
+                    print('warning !')
+                    pass
+                count_vocal[word] = count_vocal.get(word, 0) + 1
+
+print("total word:", sum(count_vocal.values()))
+count_vocal['<bos>'] = 1e10
+count_vocal['<eos>'] = 1e10
+vocab = [word for word, n in count_vocal.items() if n >= count_threshold]
+bad_word = [word for word, n in count_vocal.items() if n < count_threshold]
+bad_count = sum(count_vocal[word] for word in bad_word)
+
+vocab.append('UNK')
+print("number of vocab:", len(vocab))
+print("number of bad word:", len(bad_word))
+print("number of unks:", bad_count)
+
+itow = {i + 1: w for i, w in enumerate(vocab)}
+wtoi = {w: i + 1 for i, w in enumerate(vocab)}
+print(len(itow))
+print(len(wtoi))
+
+json.dump({'ix_to_word': itow,
+           'word_to_ix': wtoi}, open(output_path, 'w'))
+print("saving vocabulary file to {}".format(output_path))
\ No newline at end of file
diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6e18c06bca951f4d0ae6bc5e92a08175f68343c
Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-37.pyc differ
diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4f6c9c6fb2356fb3b50ae9390f74b21203aa9a4
Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/box_ops.cpython-38.pyc differ
diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c8794fcb4c80bab0af2f4c0acf2e324518d3630a
Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-37.pyc differ
diff --git a/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef902352c76a36b2fe11f4a84a4b6186c48b2831
Binary files /dev/null and b/yc2_univl/backup/misc/detr_utils/__pycache__/misc.cpython-38.pyc differ
diff --git a/yc2_univl/backup/misc/detr_utils/box_ops.py b/yc2_univl/backup/misc/detr_utils/box_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d7106ba6c48a3cc3827a4bd923b08c7c61213af
--- /dev/null
+++ b/yc2_univl/backup/misc/detr_utils/box_ops.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Utilities for bounding box manipulation and GIoU.
+"""
+import torch
+from torchvision.ops.boxes import box_area
+
+def box_cl_to_xy(x):
+    c, l = x.unbind(-1)
+    b = [c - 0.5 * l, c + 0.5 * l]
+    return torch.stack(b, dim=-1)
+
+def box_xy_to_cl(x):
+    x0, x1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (x1 - x0)]
+    return torch.stack(b, dim=-1)
+
+# modified from torchvision to also return the union
+def box_iou(boxes1, boxes2):
+    area1 = boxes1[:, 1] - boxes1[:, 0]
+    area2 = boxes2[:, 1] - boxes2[:, 0]
+    lt = torch.max(boxes1[:, None, 0], boxes2[:, 0])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 1], boxes2[:, 1])  # [N,M,2]
+    inter = (rb - lt).clamp(min=0)  # [N,M,2]
+    union = area1[:, None] + area2 - inter
+    iou = inter / (union + 1e-5)
+    return iou, union
+
+
+def generalized_box_iou(boxes1, boxes2):
+    """
+    Generalized IoU from https://giou.stanford.edu/
+
+    The boxes should be in [x0, y0, x1, y1] format
+
+    Returns a [N, M] pairwise matrix, where N = len(boxes1)
+    and M = len(boxes2)
+    """
+    # degenerate boxes gives inf / nan results
+    # so do an early check
+    assert (boxes1[:, 1:] >= boxes1[:, :1]).all()
+    assert (boxes2[:, 1:] >= boxes2[:, :1]).all()
+    iou, union = box_iou(boxes1, boxes2)
+    lt = torch.min(boxes1[:, None, 0], boxes2[:, 0])
+    rb = torch.max(boxes1[:, None, 1], boxes2[:, 1])
+    area = (rb - lt).clamp(min=0)  # [N,M,2]
+    giou = iou - (area - union) / (area + 1e-5)
+    return giou
\ No newline at end of file
diff --git a/yc2_univl/backup/misc/detr_utils/misc.py b/yc2_univl/backup/misc/detr_utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..168603606353a959ca9cf6a39fbf2d7f9216e560
--- /dev/null
+++ b/yc2_univl/backup/misc/detr_utils/misc.py
@@ -0,0 +1,989 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+
+import torch
+import torch.distributed as dist
+from torch import Tensor
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+# if float(torchvision.__version__[:3]) < 0.7:
+#     from torchvision.ops import _new_empty_tensor
+#     from torchvision.ops.misc import _output_size
+
+
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+from torch import Tensor
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+if float(torchvision.__version__[:3]) < 0.5:
+    import math
+    # from torchvision.ops.misc import _NewEmptyTensorOp
+    def _check_size_scale_factor(dim, size, scale_factor):
+        # type: (int, Optional[List[int]], Optional[float]) -> None
+        if size is None and scale_factor is None:
+            raise ValueError("either size or scale_factor should be defined")
+        if size is not None and scale_factor is not None:
+            raise ValueError("only one of size or scale_factor should be defined")
+        if not (scale_factor is not None and len(scale_factor) != dim):
+            raise ValueError(
+                "scale_factor shape must match input shape. "
+                "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
+            )
+    def _output_size(dim, input, size, scale_factor):
+        # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int]
+        assert dim == 2
+        _check_size_scale_factor(dim, size, scale_factor)
+        if size is not None:
+            return size
+        # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat
+        assert scale_factor is not None and isinstance(scale_factor, (int, float))
+        scale_factors = [scale_factor, scale_factor]
+        # math.floor might return float in py2.7
+        return [
+            int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
+        ]
+elif float(torchvision.__version__[:3]) < 0.7:
+    from torchvision.ops import _new_empty_tensor
+    from torchvision.ops.misc import _output_size
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def get_sha():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+
+    def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
+    sha = 'N/A'
+    diff = "clean"
+    branch = 'N/A'
+    try:
+        sha = _run(['git', 'rev-parse', 'HEAD'])
+        subprocess.check_output(['git', 'diff'], cwd=cwd)
+        diff = _run(['git', 'diff-index', 'HEAD'])
+        diff = "has uncommited changes" if diff else "clean"
+        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+    except Exception:
+        pass
+    message = f"sha: {sha}, status: {diff}, branch: {branch}"
+    return message
+
+
+def collate_fn(batch):
+    batch = list(zip(*batch))
+    batch[0] = nested_tensor_from_tensor_list(batch[0])
+    return tuple(batch)
+
+
+def _max_by_axis(the_list):
+    # type: (List[List[int]]) -> List[int]
+    maxes = the_list[0]
+    for sublist in the_list[1:]:
+        for index, item in enumerate(sublist):
+            maxes[index] = max(maxes[index], item)
+    return maxes
+
+
+def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+    # TODO make this more general
+    if tensor_list[0].ndim == 3:
+        # TODO make it support different-sized images
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], :img.shape[2]] = False
+    else:
+        raise ValueError('not supported')
+    return NestedTensor(tensor, mask)
+
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask: Optional[Tensor], duration=None):
+        self.tensors = tensors
+        self.mask = mask
+        self.duration = duration
+
+    def to(self, device, non_blocking=False):
+        # type: (Device) -> NestedTensor # noqa
+        cast_tensor = self.tensors.to(device, non_blocking=non_blocking)
+        mask = self.mask
+        if mask is not None:
+            assert mask is not None
+            cast_mask = mask.to(device, non_blocking=non_blocking)
+        else:
+            cast_mask = None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def record_stream(self, *args, **kwargs):
+        self.tensors.record_stream(*args, **kwargs)
+        if self.mask is not None:
+            self.mask.record_stream(*args, **kwargs)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return int(os.environ['LOCAL_SIZE'])
+
+
+def get_local_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return int(os.environ['LOCAL_RANK'])
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+        args.dist_url = 'env://'
+        os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count())
+    elif 'SLURM_PROCID' in os.environ:
+        proc_id = int(os.environ['SLURM_PROCID'])
+        ntasks = int(os.environ['SLURM_NTASKS'])
+        node_list = os.environ['SLURM_NODELIST']
+        num_gpus = torch.cuda.device_count()
+        addr = subprocess.getoutput(
+            'scontrol show hostname {} | head -n1'.format(node_list))
+        os.environ['MASTER_PORT'] = os.environ.get('MASTER_PORT', '29500')
+        os.environ['MASTER_ADDR'] = addr
+        os.environ['WORLD_SIZE'] = str(ntasks)
+        os.environ['RANK'] = str(proc_id)
+        os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
+        os.environ['LOCAL_SIZE'] = str(num_gpus)
+        args.dist_url = 'env://'
+        args.world_size = ntasks
+        args.rank = proc_id
+        args.gpu = proc_id % num_gpus
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+
+@torch.no_grad()
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    if target.numel() == 0:
+        return [torch.zeros([], device=output.device)]
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+# def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+#     # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+#     """
+#     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+#     This will eventually be supported natively by PyTorch, and this
+#     class can go away.
+#     """
+#     if float(torchvision.__version__[:3]) < 0.7:
+#         if input.numel() > 0:
+#             return torch.nn.functional.interpolate(
+#                 input, size, scale_factor, mode, align_corners
+#             )
+#
+#         output_shape = _output_size(2, input, size, scale_factor)
+#         output_shape = list(input.shape[:-2]) + list(output_shape)
+#         if float(torchvision.__version__[:3]) < 0.5:
+#             return _NewEmptyTensorOp.apply(input, output_shape)
+#         return _new_empty_tensor(input, output_shape)
+#     else:
+#         return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
+
+
+def get_total_grad_norm(parameters, norm_type=2):
+    parameters = list(filter(lambda p: p.grad is not None, parameters))
+    norm_type = float(norm_type)
+    device = parameters[0].grad.device
+    total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]),
+                            norm_type)
+    return total_norm
+
+
+def inverse_sigmoid(x, eps=1e-5):
+    x = x.clamp(min=0, max=1)
+    x1 = x.clamp(min=eps)
+    x2 = (1 - x).clamp(min=eps)
+    return torch.log(x1/x2)
+
+
+
+# class SmoothedValue(object):
+#     """Track a series of values and provide access to smoothed values over a
+#     window or the global series average.
+#     """
+#
+#     def __init__(self, window_size=20, fmt=None):
+#         if fmt is None:
+#             fmt = "{median:.4f} ({global_avg:.4f})"
+#         self.deque = deque(maxlen=window_size)
+#         self.total = 0.0
+#         self.count = 0
+#         self.fmt = fmt
+#
+#     def update(self, value, n=1):
+#         self.deque.append(value)
+#         self.count += n
+#         self.total += value * n
+#
+#     def synchronize_between_processes(self):
+#         """
+#         Warning: does not synchronize the deque!
+#         """
+#         if not is_dist_avail_and_initialized():
+#             return
+#         t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+#         dist.barrier()
+#         dist.all_reduce(t)
+#         t = t.tolist()
+#         self.count = int(t[0])
+#         self.total = t[1]
+#
+#     @property
+#     def median(self):
+#         d = torch.tensor(list(self.deque))
+#         return d.median().item()
+#
+#     @property
+#     def avg(self):
+#         d = torch.tensor(list(self.deque), dtype=torch.float32)
+#         return d.mean().item()
+#
+#     @property
+#     def global_avg(self):
+#         return self.total / self.count
+#
+#     @property
+#     def max(self):
+#         return max(self.deque)
+#
+#     @property
+#     def value(self):
+#         return self.deque[-1]
+#
+#     def __str__(self):
+#         return self.fmt.format(
+#             median=self.median,
+#             avg=self.avg,
+#             global_avg=self.global_avg,
+#             max=self.max,
+#             value=self.value)
+#
+#
+# def all_gather(data):
+#     """
+#     Run all_gather on arbitrary picklable data (not necessarily tensors)
+#     Args:
+#         data: any picklable object
+#     Returns:
+#         list[data]: list of data gathered from each rank
+#     """
+#     world_size = get_world_size()
+#     if world_size == 1:
+#         return [data]
+#
+#     # serialized to a Tensor
+#     buffer = pickle.dumps(data)
+#     storage = torch.ByteStorage.from_buffer(buffer)
+#     tensor = torch.ByteTensor(storage).to("cuda")
+#
+#     # obtain Tensor size of each rank
+#     local_size = torch.tensor([tensor.numel()], device="cuda")
+#     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+#     dist.all_gather(size_list, local_size)
+#     size_list = [int(size.item()) for size in size_list]
+#     max_size = max(size_list)
+#
+#     # receiving Tensor from all ranks
+#     # we pad the tensor because torch all_gather does not support
+#     # gathering tensors of different shapes
+#     tensor_list = []
+#     for _ in size_list:
+#         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+#     if local_size != max_size:
+#         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+#         tensor = torch.cat((tensor, padding), dim=0)
+#     dist.all_gather(tensor_list, tensor)
+#
+#     data_list = []
+#     for size, tensor in zip(size_list, tensor_list):
+#         buffer = tensor.cpu().numpy().tobytes()[:size]
+#         data_list.append(pickle.loads(buffer))
+#
+#     return data_list
+#
+#
+# def reduce_dict(input_dict, average=True):
+#     """
+#     Args:
+#         input_dict (dict): all the values will be reduced
+#         average (bool): whether to do average or sum
+#     Reduce the values in the dictionary from all processes so that all processes
+#     have the averaged results. Returns a dict with the same fields as
+#     input_dict, after reduction.
+#     """
+#     world_size = get_world_size()
+#     if world_size < 2:
+#         return input_dict
+#     with torch.no_grad():
+#         names = []
+#         values = []
+#         # sort the keys so that they are consistent across processes
+#         for k in sorted(input_dict.keys()):
+#             names.append(k)
+#             values.append(input_dict[k])
+#         values = torch.stack(values, dim=0)
+#         dist.all_reduce(values)
+#         if average:
+#             values /= world_size
+#         reduced_dict = {k: v for k, v in zip(names, values)}
+#     return reduced_dict
+#
+#
+# class MetricLogger(object):
+#     def __init__(self, delimiter="\t"):
+#         self.meters = defaultdict(SmoothedValue)
+#         self.delimiter = delimiter
+#
+#     def update(self, **kwargs):
+#         for k, v in kwargs.items():
+#             if isinstance(v, torch.Tensor):
+#                 v = v.item()
+#             assert isinstance(v, (float, int))
+#             self.meters[k].update(v)
+#
+#     def __getattr__(self, attr):
+#         if attr in self.meters:
+#             return self.meters[attr]
+#         if attr in self.__dict__:
+#             return self.__dict__[attr]
+#         raise AttributeError("'{}' object has no attribute '{}'".format(
+#             type(self).__name__, attr))
+#
+#     def __str__(self):
+#         loss_str = []
+#         for name, meter in self.meters.items():
+#             loss_str.append(
+#                 "{}: {}".format(name, str(meter))
+#             )
+#         return self.delimiter.join(loss_str)
+#
+#     def synchronize_between_processes(self):
+#         for meter in self.meters.values():
+#             meter.synchronize_between_processes()
+#
+#     def add_meter(self, name, meter):
+#         self.meters[name] = meter
+#
+#     def log_every(self, iterable, print_freq, header=None):
+#         i = 0
+#         if not header:
+#             header = ''
+#         start_time = time.time()
+#         end = time.time()
+#         iter_time = SmoothedValue(fmt='{avg:.4f}')
+#         data_time = SmoothedValue(fmt='{avg:.4f}')
+#         space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+#         if torch.cuda.is_available():
+#             log_msg = self.delimiter.join([
+#                 header,
+#                 '[{0' + space_fmt + '}/{1}]',
+#                 'eta: {eta}',
+#                 '{meters}',
+#                 'time: {time}',
+#                 'data: {data}',
+#                 'max mem: {memory:.0f}'
+#             ])
+#         else:
+#             log_msg = self.delimiter.join([
+#                 header,
+#                 '[{0' + space_fmt + '}/{1}]',
+#                 'eta: {eta}',
+#                 '{meters}',
+#                 'time: {time}',
+#                 'data: {data}'
+#             ])
+#         MB = 1024.0 * 1024.0
+#         for obj in iterable:
+#             data_time.update(time.time() - end)
+#             yield obj
+#             iter_time.update(time.time() - end)
+#             if i % print_freq == 0 or i == len(iterable) - 1:
+#                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
+#                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+#                 if torch.cuda.is_available():
+#                     print(log_msg.format(
+#                         i, len(iterable), eta=eta_string,
+#                         meters=str(self),
+#                         time=str(iter_time), data=str(data_time),
+#                         memory=torch.cuda.max_memory_allocated() / MB))
+#                 else:
+#                     print(log_msg.format(
+#                         i, len(iterable), eta=eta_string,
+#                         meters=str(self),
+#                         time=str(iter_time), data=str(data_time)))
+#             i += 1
+#             end = time.time()
+#         total_time = time.time() - start_time
+#         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+#         print('{} Total time: {} ({:.4f} s / it)'.format(
+#             header, total_time_str, total_time / len(iterable)))
+#
+#
+# def get_sha():
+#     cwd = os.path.dirname(os.path.abspath(__file__))
+#
+#     def _run(command):
+#         return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
+#     sha = 'N/A'
+#     diff = "clean"
+#     branch = 'N/A'
+#     try:
+#         sha = _run(['git', 'rev-parse', 'HEAD'])
+#         subprocess.check_output(['git', 'diff'], cwd=cwd)
+#         diff = _run(['git', 'diff-index', 'HEAD'])
+#         diff = "has uncommited changes" if diff else "clean"
+#         branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+#     except Exception:
+#         pass
+#     message = f"sha: {sha}, status: {diff}, branch: {branch}"
+#     return message
+#
+# #
+# # def collate_fn(batch):
+# #     batch = list(zip(*batch))
+# #     batch[0] = nested_tensor_from_tensor_list(batch[0])
+# #     return tuple(batch)
+#
+#
+# def _max_by_axis(the_list):
+#     # type: (List[List[int]]) -> List[int]
+#     maxes = the_list[0]
+#     for sublist in the_list[1:]:
+#         for index, item in enumerate(sublist):
+#             maxes[index] = max(maxes[index], item)
+#     return maxes
+#
+#
+# class NestedTensor(object):
+#     def __init__(self, tensors, mask: Optional[Tensor]):
+#         self.tensors = tensors
+#         self.mask = mask
+#
+#     def to(self, device):
+#         # type: (Device) -> NestedTensor # noqa
+#         cast_tensor = self.tensors.to(device)
+#         mask = self.mask
+#         if mask is not None:
+#             assert mask is not None
+#             cast_mask = mask.to(device)
+#         else:
+#             cast_mask = None
+#         return NestedTensor(cast_tensor, cast_mask)
+#
+#     def decompose(self):
+#         return self.tensors, self.mask
+#
+#     def __repr__(self):
+#         return str(self.tensors)
+#
+# #
+# # def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+# #     # TODO make this more general
+# #     if tensor_list[0].ndim == 3:
+# #         if torchvision._is_tracing():
+# #             # nested_tensor_from_tensor_list() does not export well to ONNX
+# #             # call _onnx_nested_tensor_from_tensor_list() instead
+# #             return _onnx_nested_tensor_from_tensor_list(tensor_list)
+# #
+# #         # TODO make it support different-sized images
+# #         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+# #         # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+# #         batch_shape = [len(tensor_list)] + max_size
+# #         b, c, h, w = batch_shape
+# #         dtype = tensor_list[0].dtype
+# #         device = tensor_list[0].device
+# #         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+# #         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+# #         for img, pad_img, m in zip(tensor_list, tensor, mask):
+# #             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+# #             m[: img.shape[1], :img.shape[2]] = False
+# #     else:
+# #         raise ValueError('not supported')
+# #     return NestedTensor(tensor, mask)
+#
+#
+# # _onnx_nested_tensor_from_tensor_list() is an implementation of
+# # nested_tensor_from_tensor_list() that is supported by ONNX tracing.
+# # @torch.jit.unused
+# # def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
+# #     max_size = []
+# #     for i in range(tensor_list[0].dim()):
+# #         max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
+# #         max_size.append(max_size_i)
+# #     max_size = tuple(max_size)
+# #
+# #     # work around for
+# #     # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+# #     # m[: img.shape[1], :img.shape[2]] = False
+# #     # which is not yet supported in onnx
+# #     padded_imgs = []
+# #     padded_masks = []
+# #     for img in tensor_list:
+# #         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+# #         padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+# #         padded_imgs.append(padded_img)
+# #
+# #         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+# #         padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+# #         padded_masks.append(padded_mask.to(torch.bool))
+# #
+# #     tensor = torch.stack(padded_imgs)
+# #     mask = torch.stack(padded_masks)
+# #
+# #     return NestedTensor(tensor, mask=mask)
+#
+#
+# def setup_for_distributed(is_master):
+#     """
+#     This function disables printing when not in master process
+#     """
+#     import builtins as __builtin__
+#     builtin_print = __builtin__.print
+#
+#     def print(*args, **kwargs):
+#         force = kwargs.pop('force', False)
+#         if is_master or force:
+#             builtin_print(*args, **kwargs)
+#
+#     __builtin__.print = print
+#
+#
+# def is_dist_avail_and_initialized():
+#     if not dist.is_available():
+#         return False
+#     if not dist.is_initialized():
+#         return False
+#     return True
+#
+#
+# def get_world_size():
+#     if not is_dist_avail_and_initialized():
+#         return 1
+#     return dist.get_world_size()
+#
+#
+# def get_rank():
+#     if not is_dist_avail_and_initialized():
+#         return 0
+#     return dist.get_rank()
+#
+#
+# def is_main_process():
+#     return get_rank() == 0
+#
+#
+# def save_on_master(*args, **kwargs):
+#     if is_main_process():
+#         torch.save(*args, **kwargs)
+#
+#
+# def init_distributed_mode(args):
+#     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+#         args.rank = int(os.environ["RANK"])
+#         args.world_size = int(os.environ['WORLD_SIZE'])
+#         args.gpu = int(os.environ['LOCAL_RANK'])
+#     elif 'SLURM_PROCID' in os.environ:
+#         args.rank = int(os.environ['SLURM_PROCID'])
+#         args.gpu = args.rank % torch.cuda.device_count()
+#     else:
+#         print('Not using distributed mode')
+#         args.distributed = False
+#         return
+#
+#     args.distributed = True
+#
+#     torch.cuda.set_device(args.gpu)
+#     args.dist_backend = 'nccl'
+#     print('| distributed init (rank {}): {}'.format(
+#         args.rank, args.dist_url), flush=True)
+#     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+#                                          world_size=args.world_size, rank=args.rank)
+#     torch.distributed.barrier()
+#     setup_for_distributed(args.rank == 0)
+#
+#
+# @torch.no_grad()
+# def accuracy(output, target, topk=(1,)):
+#     """Computes the precision@k for the specified values of k"""
+#     if target.numel() == 0:
+#         return [torch.zeros([], device=output.device)]
+#     maxk = max(topk)
+#     batch_size = target.size(0)
+#
+#     _, pred = output.topk(maxk, 1, True, True)
+#     pred = pred.t()
+#     correct = pred.eq(target.view(1, -1).expand_as(pred))
+#
+#     res = []
+#     for k in topk:
+#         correct_k = correct[:k].view(-1).float().sum(0)
+#         res.append(correct_k.mul_(100.0 / batch_size))
+#     return res
+#
+#
+# # def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+# #     # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+# #     """
+# #     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+# #     This will eventually be supported natively by PyTorch, and this
+# #     class can go away.
+# #     """
+# #     if float(torchvision.__version__[:3]) < 0.7:
+# #         if input.numel() > 0:
+# #             return torch.nn.functional.interpolate(
+# #                 input, size, scale_factor, mode, align_corners
+# #             )
+# #
+# #         output_shape = _output_size(2, input, size, scale_factor)
+# #         output_shape = list(input.shape[:-2]) + list(output_shape)
+# #         return _new_empty_tensor(input, output_shape)
+# #     else:
+# #         return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/yc2_univl/backup/misc/utils.py b/yc2_univl/backup/misc/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..301a498189d0568ce14362b3630f2c89c2a26c6e
--- /dev/null
+++ b/yc2_univl/backup/misc/utils.py
@@ -0,0 +1,357 @@
+# coding:utf-8
+# from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+import torch
+import numpy as np
+import glob
+import shutil
+import os
+import colorlog
+import random
+import six
+from six.moves import cPickle
+import matplotlib as mpl
+
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+
+
+def match_name_keywords(n, name_keywords):
+    out = False
+    for b in name_keywords:
+        if b in n:
+            out = True
+            break
+    return out
+
+
+def decide_two_stage(transformer_input_type, dt, criterion):
+    if transformer_input_type == 'gt_proposals':
+        two_stage = True
+        proposals = dt['gt_boxes']
+        proposals_mask = dt['gt_boxes_mask']
+        criterion.matcher.cost_caption = 0
+        for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']:
+            for key in criterion.weight_dict.keys():
+                if q_k in key:
+                    criterion.weight_dict[key] = 0
+        disable_iterative_refine = True
+    elif transformer_input_type == 'prior_proposals':
+        two_stage = True
+        proposals = dt['gt_boxes']
+        proposals_mask = None
+        criterion.matcher.cost_caption = 0
+        for q_k in ['loss_length', 'loss_ce', 'loss_bbox', 'loss_giou']:
+            for key in criterion.weight_dict.keys():
+                if q_k in key:
+                    criterion.weight_dict[key] = 0
+        disable_iterative_refine = False
+    elif transformer_input_type == 'queries':  #
+        two_stage = False
+        proposals = None
+        proposals_mask = None
+        disable_iterative_refine = False
+    else:
+        raise ValueError('Wrong value of transformer_input_type, got {}'.format(transformer_input_type))
+    return two_stage, disable_iterative_refine, proposals, proposals_mask
+
+
+def pickle_load(f):
+    """ Load a pickle.
+    Parameters
+    ----------
+    f: file-like object
+    """
+    if six.PY3:
+        return cPickle.load(f, encoding='latin-1')
+    else:
+        return cPickle.load(f)
+
+
+def pickle_dump(obj, f):
+    """ Dump a pickle.
+    Parameters
+    ----------
+    obj: pickled object
+    f: file-like object
+    """
+    if six.PY3:
+        return cPickle.dump(obj, f, protocol=2)
+    else:
+        return cPickle.dump(obj, f)
+
+
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    # grid_sampler_2d_backward_cuda does not have a deterministic implementation. try set torch.use_deterministic_algorithms(True, warn_only=True) to see the non-deterministic operation
+    # torch.use_deterministic_algorithms(True, warn_only=True)
+
+
+def update_values(dict_from, dict_to):
+    for key, value in dict_from.items():
+        if key not in dict_to.keys():
+            raise AssertionError('key mismatching: {}'.format(key))
+        if isinstance(value, dict):
+            update_values(dict_from[key], dict_to[key])
+        elif value is not None:
+            dict_to[key] = dict_from[key]
+
+
+def print_opt(opt, model, logger):
+    print_alert_message('All args:', logger)
+    for key, item in opt._get_kwargs():
+        logger.info('{} = {}'.format(key, item))
+    print_alert_message('Model structure:', logger)
+    logger.info(model)
+
+
+def build_folder_name(opt):
+    # The dataset
+    # breakpoint()
+    if len(opt.visual_feature_folder) == 2:
+        if ('youcook2' in opt.visual_feature_folder[1]) or ('yc2' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-yc2_yc2'
+        elif ('Tasty' in opt.visual_feature_folder[1]) or ('tasty' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-tasty_tasty'
+        elif ('anet' in opt.visual_feature_folder[1]) or ('Anet' in opt.visual_feature_folder[1]):
+            dataset_name = 'howto-anet_anet'
+        # elif ('vlep' in opt.visual_feature_folder[1]) or ('Vlep' in opt.visual_feature_folder[1]):
+        #     dataset_name = 'howto-vlep_vlep'
+        else:
+            raise ValueError('Wrong dataset name')
+        
+        if 'vlep' in opt.visual_feature_folder[0] or 'Vlep' in opt.visual_feature_folder[0]:
+            dataset_name = dataset_name.replace('howto', 'vlep')
+    else:
+        if ('youcook2' in opt.visual_feature_folder[0]) or ('yc2' in opt.visual_feature_folder[0]):
+            dataset_name = 'yc2'
+        elif ('Anet' in opt.visual_feature_folder[0]) or ('anet' in opt.visual_feature_folder[0]):
+            dataset_name = 'anet'
+        elif ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]):
+            dataset_name = 'tasty'
+        elif ('Howto' in opt.visual_feature_folder[0]) or ('howto' in opt.visual_feature_folder[0]):
+            if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]):
+                dataset_name = 'howto_yc2'
+            elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'howto_tasty'
+            elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'howto_anet'
+        elif ('vlep' in opt.visual_feature_folder[0]) or ('Vlep' in opt.visual_feature_folder[0]):
+            if ('yc2' in opt.visual_feature_folder_val[0]) or ('youcook2' in opt.visual_feature_folder_val[0]):
+                dataset_name = 'vlep_yc2'
+            elif 'tasty' in opt.visual_feature_folder_val[0] or 'Tasty' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'vlep_tasty'
+            elif 'anet' in opt.visual_feature_folder_val[0] or 'Anet' in opt.visual_feature_folder_val[0]:
+                dataset_name = 'vlep_anet'
+        else:
+            raise ValueError('Wrong dataset name')
+    if 'tasty_14' in opt.dict_file:
+        dataset_name += '_voc14'
+    
+    # The code base
+    if opt.use_anchor:
+        use_anchor = 'anc' # Means learnable anchor is used
+    else:
+        use_anchor = 'ori' # Means original anchor in pdvc is used
+
+    # The state of using pseudo boxes
+    if opt.use_pseudo_box:
+        use_pseudo = 'pbox'
+        if opt.pseudo_box_type == 'similarity':
+            use_pseudo += '(sim)'
+        else:
+            use_pseudo += '({})'.format(opt.pseudo_box_type)
+    else:
+        use_pseudo = 'GT'
+
+    # The viusal-text model used
+    if opt.pretrained_language_model == 'CLIP-ViP':
+        text_model = 'ViP'
+    elif opt.pretrained_language_model == 'UniVL':
+        text_model = 'Uni'
+    else:
+        text_model = opt.pretrained_language_model
+    
+    format_folder_name = '_'.join([dataset_name, use_anchor, use_pseudo, text_model])
+    
+
+
+    return format_folder_name
+
+def build_folder(opt):
+    # breakpoint()
+    if opt.start_from:
+        print('Start training from id:{}'.format(opt.start_from))
+        save_folder = os.path.join(opt.save_dir, opt.start_from)
+        assert os.path.exists(save_folder) and os.path.isdir(save_folder), 'Wrong start_from path: {}'.format(save_folder)
+    else:
+        if not os.path.exists(opt.save_dir):
+            os.mkdir(opt.save_dir)
+        format_folder_name = build_folder_name(opt)
+        # breakpoint()
+        save_foldername = ''
+        if opt.use_pseudo_box:
+            if opt.pseudo_box_type != 'align':
+                if opt.pseudo_box_type == 'similarity_op' or opt.pseudo_box_type == 'similarity_op_order':
+                    save_foldername = '{}_topf{}_beta{}_iter{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.beta, opt.iteration, opt.width_ratio)
+                elif opt.pseudo_box_type == 'similarity_op_order_v2':
+                    save_foldername = '{}_topf{}_iter{}_r{}_th{}'.format(opt.pseudo_box_type, opt.top_frames, opt.iteration, opt.width_ratio, opt.width_th)
+                else:
+                    save_foldername = '{}_topf{}_w{}_{}_r{}'.format(opt.pseudo_box_type, opt.top_frames, opt.window_size, opt.statistic_mode, opt.width_ratio)
+            else:
+                save_folder = 'align'
+        else:
+            save_foldername = 'gtbox'
+
+        if opt.refine_pseudo_box:
+            save_foldername += '_refine_aug({},{})_top{}_{}stage'.format(opt.pseudo_box_aug_num, \
+                                                                        opt.pseudo_box_aug_ratio, \
+                                                                        opt.merge_k_boxes, \
+                                                                        opt.refine_pseudo_stage_num)
+            if opt.pseudo_box_aug_mode == 'uniform':
+                save_foldername += '_uniform'
+            elif opt.pseudo_box_aug_mode == 'random_new':
+                save_foldername += '_random_new'
+            save_foldername += ('_' + opt.merge_criterion)
+            if opt.merge_mode == 'interpolate':
+                save_foldername += '_interpolate'
+            if opt.use_neg_pseudo_box:
+                save_foldername += '_{}neg'.format(opt.num_neg_box)
+            if opt.mil_loss_coef != 1.0:
+                save_foldername += '_mil_coef{}'.format(str(opt.mil_loss_coef))
+            if opt.weighted_mil_loss:
+                save_foldername += '_wMIL'
+            if not opt.focal_mil:
+                save_foldername += '_noFocal'
+            if opt.disable_rematch:
+                save_foldername += '_nomatch'
+            if opt.use_additional_score_layer:
+                save_foldername += '_S-layer'
+            if opt.use_additional_cap_layer:
+                save_foldername += '_C-layer'
+            if 'puyu' in opt.train_caption_file[0]:
+                save_foldername += '_puyu'
+            elif 'mixlm' in opt.train_caption_file[0]:
+                save_foldername += '_mixlm'
+
+        if opt.id != '':
+            save_foldername += '_{}'.format(opt.id)
+        # breakpoint()
+        # basefilename = os.path.basename(opt.cfg_path)
+        # basefilename = os.path.splitext(basefilename)[0]
+        save_folder = os.path.join(opt.save_dir, format_folder_name)
+        save_folder = os.path.join(save_folder, save_foldername)
+        if os.path.exists(save_folder):
+            print('Results folder "{}" already exists, renaming it...'.format(save_folder))
+            i = 1
+            while 1:
+                new_save_folder = save_folder + '_{}'.format(i)
+                if not os.path.exists(new_save_folder):
+                    save_folder = new_save_folder
+                    break
+                i += 1
+            # wait_flag = input('Warning! Path {} already exists, rename it? (Y/N) : '.format(save_folder))
+            # if wait_flag in ['Y', 'y']:
+            #     # opt.id = opt.id + '_{}'.format(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()))
+            #     # save_folder = os.path.join(opt.save_dir, opt.id)
+            #     # print('Rename opt.id as "{}".'.format(opt.id))
+            #     new_name = input('the new name to be appended :')
+            #     save_folder = save_folder + '_' + new_name
+            # # elif wait_flag in ['N', 'n']:
+            # #     wait_flag_new = input('Are you sure re-write this folder:{}? (Y/N): '.format(save_folder))
+            # #     if wait_flag_new in ['Y', 'y']:
+            # #         return save_folder
+            # #     else:
+            # #         raise AssertionError('Folder {} already exists'.format(save_folder))
+            # else:
+            #     raise AssertionError('Folder {} already exists'.format(save_folder))
+        print('Results folder "{}" does not exist, creating folder...'.format(save_folder))
+        os.makedirs(save_folder)
+        os.makedirs(os.path.join(save_folder, 'prediction'))
+    return save_folder
+
+
+def backup_envir(save_folder, opt):
+    cfg_path = opt.cfg_path
+    dir_path = os.path.dirname(cfg_path)
+    backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc']
+    if dir_path not in backup_folders:
+        backup_folders.append(dir_path)
+
+    backup_files = glob.glob('./*.py')
+    for folder in backup_folders:
+        shutil.copytree(folder, os.path.join(save_folder, 'backup', folder))
+    for file in backup_files:
+        shutil.copyfile(file, os.path.join(save_folder, 'backup', file))
+
+
+def create_logger(folder, filename):
+    log_colors = {
+        'DEBUG': 'blue',
+        'INFO': 'white',
+        'WARNING': 'green',
+        'ERROR': 'red',
+        'CRITICAL': 'yellow',
+    }
+
+    import logging
+    logger = logging.getLogger('DVC')
+    # %(filename)s$RESET:%(lineno)d
+    # LOGFORMAT = "%(log_color)s%(asctime)s [%(log_color)s%(filename)s:%(lineno)d] | %(log_color)s%(message)s%(reset)s |"
+    LOGFORMAT = ""
+    LOG_LEVEL = logging.DEBUG
+    logging.root.setLevel(LOG_LEVEL)
+    stream = logging.StreamHandler()
+    stream.setLevel(LOG_LEVEL)
+    stream.setFormatter(colorlog.ColoredFormatter(LOGFORMAT, datefmt='%d %H:%M', log_colors=log_colors))
+
+    # print to log file
+    hdlr = logging.FileHandler(os.path.join(folder, filename))
+    hdlr.setLevel(LOG_LEVEL)
+    # hdlr.setFormatter(logging.Formatter("[%(asctime)s] %(message)s"))
+    hdlr.setFormatter(logging.Formatter("%(message)s"))
+    logger.addHandler(hdlr)
+    logger.addHandler(stream)
+    return logger
+
+
+def print_alert_message(str, logger=None):
+    msg = '*' * 20 + ' ' + str + ' ' + '*' * (58 - len(str))
+    if logger:
+        logger.info('\n\n' + msg)
+    else:
+        print(msg)
+
+
+def set_lr(optimizer, lr):
+    for group in optimizer.param_groups:
+        group['lr'] = lr
+
+
+def clip_gradient(optimizer, grad_clip):
+    for group in optimizer.param_groups:
+        for i, param in enumerate(group['params']):
+            if param.grad is not None:
+                param.grad.data.clamp_(-grad_clip, grad_clip)
+
+
+if __name__ == '__main__':
+    # import opts
+    #
+    # info = {'opt': vars(opts.parse_opts()),
+    #         'loss': {'tap_loss': 0, 'tap_reg_loss': 0, 'tap_conf_loss': 0, 'lm_loss': 0}}
+    # record_this_run_to_csv(info, 'save/results_all_runs.csv')
+
+    logger = create_logger('./', 'mylogger.log')
+    logger.info('debug')
+    logger.info('test2')
diff --git a/yc2_univl/backup/opts.py b/yc2_univl/backup/opts.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c0abaea05f6aefca9779237b1d3c555f10e45ec
--- /dev/null
+++ b/yc2_univl/backup/opts.py
@@ -0,0 +1,312 @@
+import argparse
+import time
+import yaml
+import os
+import numpy as np
+
+def parse_opts():
+    parser = argparse.ArgumentParser()
+
+    # configure of this run
+    parser.add_argument('--cfg_path', type=str, required=True, help='config file')
+    parser.add_argument('--id', type=str, default='', help='id of this run. Results and logs will saved in this folder ./save/id')
+    parser.add_argument('--gpu_id', type=str, nargs='+', default=[])
+    parser.add_argument('--disable_tqdm', action='store_true')
+    parser.add_argument('--seed', type=int, default=777)
+    parser.add_argument('--random_seed',  action='store_true', help='choose a random seed from {1,...,1000}')
+    parser.add_argument('--disable_cudnn', type=int, default=0, help='disable cudnn may solve some unknown bugs')
+    parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging')
+    parser.add_argument('--device', default='cuda', choices=['cpu', 'cuda'], help='device to use for training / testing')
+    parser.add_argument('--map', action='store_true', default=False, help='map a100 data path to 3090 data path')
+    # parser.add_argument('--extra_id', type=str, default='', help='extra config for listed in the folder name')
+
+    #  ***************************** INPUT DATA PATH *****************************
+    parser.add_argument('--train_caption_file', type=str,
+                        default='data/anet/captiondata/train_modified.json', help='')
+    parser.add_argument('--invalid_video_json', type=str, nargs='+', default=[])
+    parser.add_argument('--val_caption_file', type=str, default='data/anet/captiondata/val_1.json')
+    parser.add_argument('--visual_feature_folder', type=str, default='data/anet/resnet_bn')
+    parser.add_argument('--text_feature_folder', type=str, default=None)
+    parser.add_argument('--gt_file_for_auc', type=str, nargs='+', default='data/anet/captiondata/val_all.json')
+    parser.add_argument('--gt_file_for_eval', type=str, nargs='+', default=['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'])
+    parser.add_argument('--gt_file_for_para_eval', type=str, nargs='+', default= ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'])
+    parser.add_argument('--dict_file', type=str, default='data/anet/vocabulary_activitynet.json', help='')
+    parser.add_argument('--criteria_for_best_ckpt', type=str, default='overall', choices=['dvc', 'pc', 'overall'], help='for dense video captioning, use soda_c + METEOR as the criteria'
+                                                                                                         'for paragraph captioning, choose the best para_METEOR+para_CIDEr+para_BLEU4'
+                                                                                                         'for overall, select BLEU4 + METEOR + F1_score')
+
+    parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet', 'UniVL', 'CLIP', 'CLIP-ViP'])
+    parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector')
+
+    parser.add_argument('--start_from', type=str, default='', help='id of the run with incompleted training')
+    parser.add_argument('--start_from_mode', type=str, choices=['best', 'last'], default="last")
+    parser.add_argument('--pretrain', type=str, choices=['full', 'encoder', 'decoder'])
+    parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth')
+
+    #  ***************************** DATALOADER OPTION *****************************
+    parser.add_argument('--nthreads', type=int, default=4)
+    parser.add_argument('--data_norm', type=int, default=0)
+    parser.add_argument('--data_rescale', type=int, default=1)
+
+    parser.add_argument('--feature_sample_rate', type=int, default=1)
+    parser.add_argument('--train_proposal_sample_num', type=int,
+                        default=24,
+                        help='number of sampled proposals (or proposal sequence), a bigger value may be better')
+    parser.add_argument('--gt_proposal_sample_num', type=int, default=30)
+    parser.add_argument('--ft_gt_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.')
+    parser.add_argument('--pre_percent', type=float, default=1.0, help='the percentage of gt samples used in pbox+gt setting. 1.0 means using all gt samples in yc2/tasty.')
+
+
+    #  ***************************** Caption Decoder  *****************************
+    parser.add_argument('--vocab_size', type=int, default=5747)
+    parser.add_argument('--wordRNN_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'],
+                        help='C:clip-level features, E: event-level features, C+E: both')
+    parser.add_argument('--caption_decoder_type', type=str, default="light",
+                        choices=['none','light', 'standard'])
+    parser.add_argument('--rnn_size', type=int, default=512,
+                        help='size of the rnn in number of hidden nodes in each layer')
+    parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the RNN')
+    parser.add_argument('--input_encoding_size', type=int, default=512,
+                        help='the encoding size of each token in the vocabulary')
+    parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP')
+    parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN')
+    parser.add_argument('--max_caption_len', type=int, default=30, help='')
+
+    #  ***************************** Transformer  *****************************
+    parser.add_argument('--hidden_dim', type=int, default=512)
+    parser.add_argument('--num_queries', type=int, default=100)
+    parser.add_argument('--hidden_dropout_prob', type=float, default=0.5)
+    parser.add_argument('--layer_norm_eps', type=float, default=1e-12)
+    parser.add_argument('--caption_cost_type', type=str, default='loss')
+    parser.add_argument('--set_cost_caption', type=float, default=0)
+    parser.add_argument('--set_cost_class', type=float, default=1)
+    parser.add_argument('--set_cost_bbox', type=float, default=5)
+    parser.add_argument('--set_cost_giou', type=float, default=2)
+    parser.add_argument('--cost_alpha', type=float, default=0.25)
+    parser.add_argument('--cost_gamma', type=float, default=2)
+
+    parser.add_argument('--bbox_loss_coef', default=5, type=float)
+    parser.add_argument('--giou_loss_coef', default=2, type=float)
+    parser.add_argument('--count_loss_coef', default=0, type=float)
+    parser.add_argument('--caption_loss_coef', default=0, type=float)
+    parser.add_argument('--eos_coef', default=0.1, type=float,
+                        help="Relative classification weight of the no-object class")
+    parser.add_argument('--num_classes', type=int, default=1)
+    parser.add_argument('--dec_layers', type=int, default=6)
+    parser.add_argument('--enc_layers', type=int, default=6)
+    parser.add_argument('--transformer_ff_dim', type=int, default=2048)
+    parser.add_argument('--transformer_dropout_prob', type=float, default=0.1)
+    parser.add_argument('--frame_embedding_num', type=int, default = 100)
+    parser.add_argument('--sample_method', type=str, default = 'nearest', choices=['nearest', 'linear'])
+    parser.add_argument('--fix_xcw', type=int, default=0)
+
+    # ***************************** Learnable anchor *****************************
+    parser.add_argument('--use_anchor', default=False, action='store_true')
+    parser.add_argument('--random_anchor_init', default=True, action='store_false')
+    parser.add_argument('--prior_anchor_duration_init', default=True, action='store_false')
+
+    # ***************************** Text-query alignment *****************************
+    parser.add_argument('--matcher_type', type=str, default='default', choices=['default', 'DTW', 'Sim'])
+    # === For Text encoder ===
+    parser.add_argument('--pretrained_language_model', type=str, default='UniVL', \
+                        choices=['UniVL', 'CLIP', 'CLIP-ViP'], help='Pretrained hugging face model')
+    parser.add_argument('--text_hidden_dim', type=int, default=768, help='hidden dim of text encoder')
+    parser.add_argument('--max_text_input_len', type=int, default=32, help='')
+    parser.add_argument('--max_pos_num', type=int, default=500)
+    parser.add_argument('--huggingface_cache_dir', type=str, default='.cache')
+    parser.add_argument('--text_encoder_learning_strategy', type=str, default='frozen',choices=('frozen'))
+
+    # === For generate_pesudo_bbox ===
+    parser.add_argument('--use_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--pseudo_box_type', type=str, default='similarity', choices=['align', 'similarity', 'weight_sim', 'weight_index', 'modeframe'])
+
+    # 1) For different ways of generating pseudo box
+    parser.add_argument('--top_frames', type=int, default=15)
+    parser.add_argument('--window_size', type=int, default=2)
+    parser.add_argument('--statistic_mode', type=str, default='median', choices=['mode', 'median'])
+    parser.add_argument('--width_ratio', type=float, default=-1)
+    parser.add_argument('--beta', type=float, default=1, help="weight for overlap loss")
+    parser.add_argument('--width_th', type=float, default=0.5, help="threshold for width")
+    parser.add_argument('--iteration', type=int, default=3, help="iteration for pseudo box generation")
+    # 2) For box refinement
+    parser.add_argument('--pseudo_box_aug', default=False, action='store_true')
+    parser.add_argument('--pseudo_box_aug_num', type=int, default=5)
+    parser.add_argument('--pseudo_box_aug_ratio', type=float, default=0.1)
+    parser.add_argument('--pseudo_box_aug_mode', default='random', choices=['random', 'uniform'])
+    parser.add_argument('--refine_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--use_additional_score_layer', default=False, action='store_true')
+    parser.add_argument('--use_additional_cap_layer', default=False, action='store_true')
+    parser.add_argument('--merge_k_boxes', type=int, default=3)
+    parser.add_argument('--merge_criterion', type=str, choices=['cap_topk', 'ins_topk', 'ins_cap_topk'], default='cap_topk')
+    parser.add_argument('--merge_mode', type=str, choices=['weighted_sum, interpolate'], default='weighted_sum')
+    parser.add_argument('--refine_pseudo_stage_num', type=int, default=2)
+    parser.add_argument('--use_query_box_for_refine', default=False, action='store_true')
+    parser.add_argument('--norm_ins_score', default='sigmoid', choices=['sigmoid', 'softmax'])
+    parser.add_argument('--cap_prob_clip', default=False, action='store_true')
+    parser.add_argument('--use_neg_pseudo_box', default=False, action='store_true')
+    parser.add_argument('--num_neg_box', default=10, type=int)
+    parser.add_argument('--weighted_mil_loss', default=False, action='store_true')
+    parser.add_argument('--focal_mil', default=False, action='store_true')
+    parser.add_argument('--disable_rematch', default=False, action='store_true')
+    parser.add_argument('--start_refine_epoch', default=-1, type=int)
+    
+
+    # === For DTW ===
+    parser.add_argument('--align_keep_percentile', type=float, default=0.1)
+    parser.add_argument('--align_top_band_size', type=int, default=0)
+    parser.add_argument('--align_drop_z', type=int, default=0)
+    parser.add_argument('--align_one_to_many', default=False, action='store_true')
+    parser.add_argument('--align_many_to_one', default=False, action='store_true')
+    parser.add_argument('--align_contiguous', default=False, action='store_true')
+    
+    # === For Sim matcher
+    parser.add_argument('--set_cost_sim', type=float, default=1.0)
+
+    # === For contrastive ===
+    parser.add_argument('--enable_contrastive', default=False, action='store_true', help='enable contrastive learning')
+    parser.add_argument('--disable_contrastive_projection', default=False, action='store_true', help='disable contrastive projection layers')
+    parser.add_argument('--contrastive_hidden_size', type=int, default=128, help='Contrastive hidden size')
+    parser.add_argument('--contrastive_loss_start_coef', type=float, default=0.1, help='Weight of contrastive loss')
+    parser.add_argument('--contrastive_loss_temperature', type=float, default=0.1, help='Temperature of cl temperature')
+    parser.add_argument('--enable_cross_video_cl', type=bool, default=True, help='Enable cross video contrastive loss')
+    parser.add_argument('--enable_e2t_cl', default=True, action='store_true', help=' enable event-to-text contrastive')
+    parser.add_argument('--enable_bg_for_cl', default=True, action='store_true', help=' add a class for background events')
+    parser.add_argument('--set_cost_cl', type=float, default=0.0)
+    parser.add_argument('--cl_schedule_val', type=float, nargs='+', default=[0, 0.1])
+    parser.add_argument('--cl_schedule_time', type=int, nargs='+', default=[0, 2])
+
+
+
+    # ***************************** Prior *****************************
+    parser.add_argument('--prior_manner', type=str, default='all', choices=['add', 'all'])
+
+    #  ***************************** OPTIMIZER *****************************
+    parser.add_argument('--training_scheme', type=str, default='all', choices=['cap_head_only', 'no_cap_head', 'all'])
+    parser.add_argument('--epoch', type=int, default=25)
+    parser.add_argument('--batch_size', type=int, default=1, help='batch_size')
+    parser.add_argument('--batch_size_for_eval', type=int, default=1, help='')
+    parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value')
+    parser.add_argument('--optimizer_type', type=str, default='adam')
+    parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay')
+
+    parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature')
+    parser.add_argument('--learning_rate_decay_start', type=float, default=8)
+    parser.add_argument('--learning_rate_decay_every', type=float, default=3)
+    parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5)
+
+    #  ***************************** SAVING AND LOGGING *****************************
+    parser.add_argument('--min_epoch_when_save', type=int, default=-1)
+    parser.add_argument('--save_checkpoint_every', type=int, default=1)
+    parser.add_argument('--save_all_checkpoint', action='store_true')
+    parser.add_argument('--save_dir', type=str, default='/mnt/data/pjlab-3090-sport/wuhao/logs/dibs', help='directory to store checkpointed models')
+
+    #  ***************************** For Deformable DETR *************************************
+    parser.add_argument('--lr_backbone_names', default=["None"], type=str, nargs='+')
+    parser.add_argument('--lr_backbone', default=2e-5, type=float)
+    parser.add_argument('--lr_proj', default=0, type=int)
+    parser.add_argument('--lr_linear_proj_names', default=['reference_points', 'sampling_offsets'], type=str, nargs='+')
+    parser.add_argument('--lr_linear_proj_mult', default=0.1, type=float)
+
+    # Variants of Deformable DETR
+    parser.add_argument('--with_box_refine', default=False, action='store_true')
+    parser.add_argument('--transformer_input_type', default='queries', choices=['gt_proposals', 'prior_proposals', 'learnt_proposals', 'queries'])
+
+    # * Backbone
+    parser.add_argument('--backbone', default=None, type=str,
+                        help="Name of the convolutional backbone to use")
+    parser.add_argument('--dilation', action='store_true',
+                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
+    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
+                        help="Type of positional embedding to use on top of the image features")
+    parser.add_argument('--position_embedding_scale', default=2 * np.pi, type=float,
+                        help="position / size * scale")
+    parser.add_argument('--num_feature_levels', default=4, type=int, help='number of feature levels')
+
+    # * Transformer
+
+    parser.add_argument('--nheads', default=8, type=int,
+                        help="Number of attention heads inside the transformer's attentions")
+    parser.add_argument('--dec_n_points', default=4, type=int)
+    parser.add_argument('--enc_n_points', default=4, type=int)
+
+    parser.add_argument('--share_caption_head', type = int ,default=1)
+
+    parser.add_argument('--cap_nheads', default=8, type=int)
+    parser.add_argument('--cap_dec_n_points', default=4, type=int)
+    parser.add_argument('--cap_num_feature_levels', default=4, type=int)
+    parser.add_argument('--disable_mid_caption_heads', action='store_true')
+
+    # Loss
+    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
+                        help="Disables auxiliary decoding losses (loss at each layer)")
+
+
+    # * Loss coefficients
+
+    parser.add_argument('--cls_loss_coef', default=2, type=float)
+    parser.add_argument('--self_iou_loss_coef', default=0.0, type=float)
+    parser.add_argument('--ref_rank_loss_coef', default=0.1, type=float)
+    parser.add_argument('--mil_loss_coef', default=1.0, type=float)
+    parser.add_argument('--focal_alpha', default=0.25, type=float)
+    parser.add_argument('--focal_gamma', default=2., type=float)
+
+
+    #***************************** Event counter *****************************
+    parser.add_argument('--max_eseq_length', default=10, type=int)
+    parser.add_argument('--lloss_gau_mask', default=1, type=int)
+    parser.add_argument('--lloss_beta', default=1, type=float)
+
+    # scheduled sampling
+    parser.add_argument('--scheduled_sampling_start', type=int, default=-1,
+                        help='at what iteration to start decay gt probability')
+    parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob')
+    parser.add_argument('--scheduled_sampling_increase_every', type=int, default=2,
+                        help='every how many iterations thereafter to gt probability')
+    parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05,
+                        help='How much to update the prob')
+    parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25,
+                        help='Maximum scheduled sampling prob.')
+
+    # reranking
+    parser.add_argument('--ec_alpha', type=float, default=0.3)
+    parser.add_argument('--test', action='store_true', default=False)
+    args = parser.parse_args()
+
+    if args.cfg_path:
+        import_cfg(args.cfg_path, vars(args))
+
+    if args.random_seed:
+        import random
+        seed = int(random.random() * 1000)
+        new_id = args.id + '_seed{}'.format(seed)
+        save_folder = os.path.join(args.save_dir, new_id)
+        while os.path.exists(save_folder):
+            seed = int(random.random() * 1000)
+            new_id = args.id + '_seed{}'.format(seed)
+            save_folder = os.path.join(args.save_dir, new_id)
+        args.id = new_id
+        args.seed = seed
+
+    if args.debug:
+        args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
+        args.save_checkpoint_every = 1
+        args.shuffle = 0
+
+    if args.caption_decoder_type == 'none':
+        assert args.caption_loss_coef == 0
+        assert args.set_cost_caption == 0
+
+    print("args.id: {}".format(args.id))
+    return args
+
+def import_cfg(cfg_path, args):
+    with open(cfg_path, 'r') as handle:
+        yml = yaml.load(handle, Loader=yaml.FullLoader)
+        if 'base_cfg_path' in yml:
+            base_cfg_path = yml['base_cfg_path']
+            import_cfg(base_cfg_path, args)
+        args.update(yml)
+    pass
+if __name__ == '__main__':
+    opt = parse_opts()
+    print(opt)
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py b/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b44fae2e15520e0c09c298d233e686c9b45d36e
--- /dev/null
+++ b/yc2_univl/backup/pdvc/CaptioningHead/LSTM.py
@@ -0,0 +1,174 @@
+# This file contains ShowAttendTell and AllImg model
+
+# ShowAttendTell is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention
+# https://arxiv.org/abs/1502.03044
+
+# AllImg is a model where
+# img feature is concatenated with word embedding at every time step as the input of lstm
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+
+import numpy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import *
+
+class Captioner(nn.Module):
+    def __init__(self, opt):
+        super(Captioner, self).__init__()
+        self.opt = opt
+
+        self.vocab_size = opt.vocab_size
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.max_caption_len = opt.max_caption_len
+
+        self.ss_prob = 0.0 # Schedule sampling probability
+        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
+
+        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
+        self.dropout = nn.Dropout(self.drop_prob_lm)
+
+        self.init_weights()
+
+    def init_weights(self):
+        initrange = 0.1
+        self.embed.weight.data.uniform_(-initrange, initrange)
+        self.logit.bias.data.fill_(0)
+        self.logit.weight.data.uniform_(-initrange, initrange)
+
+    def init_hidden(self, batch_size):
+        weight = next(self.parameters()).data
+        return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(),
+                weight.new(self.num_layers, batch_size, self.rnn_size).zero_())  # (h0, c0)
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        max_len = input.shape[1]
+        output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def forward(self, event, clip, clip_mask, seq):
+        batch_size = clip.shape[0]
+
+        state = self.init_hidden(batch_size)
+        outputs = []
+        seq = seq.long()
+
+        for i in range(seq.size(1) - 1):
+            if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample
+                sample_prob = clip.data.new(batch_size).uniform_(0, 1)
+                sample_mask = sample_prob < self.ss_prob
+                if sample_mask.sum() == 0:
+                    it = seq[:, i].clone()
+                else:
+                    sample_ind = sample_mask.nonzero().view(-1)
+                    it = seq[:, i].data.clone()
+                    prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1)
+                    it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind))
+                    it = Variable(it, requires_grad=False)
+            else:
+                it = seq[:, i].clone()
+                # break if all the sequences end
+            if i >= 1 and seq[:, i].data.sum() == 0:
+                break
+
+            output, state = self.get_logprobs_state(it, event, clip, clip_mask, state)
+            outputs.append(output)
+
+        return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
+
+
+    def get_logprobs_state(self, it, event , clip, clip_mask, state):
+        xt = self.embed(it)
+        output, state = self.core(xt, event , clip, clip_mask, state)
+        logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1)
+        return logprobs, state
+
+    def sample(self, event , clip, clip_mask, opt={}):
+
+        sample_max = opt.get('sample_max', 1)
+        beam_size = opt.get('beam_size', 1)
+        temperature = opt.get('temperature', 1.0)
+
+        batch_size = clip.shape[0]
+
+        state = self.init_hidden(batch_size)
+
+        seq = []
+        seqLogprobs = []
+
+        for t in range(self.max_caption_len + 1):
+            if t == 0: # input <bos>
+                it = clip.data.new(batch_size).long().zero_()
+            elif sample_max:
+                sampleLogprobs, it = torch.max(logprobs.data, 1)
+                it = it.view(-1).long()
+            else:
+                if temperature == 1.0:
+                    prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1)
+                else:
+                    # scale logprobs by temperature
+                    prob_prev = torch.exp(torch.div(logprobs.data, temperature))
+                it = torch.multinomial(prob_prev, 1)
+                sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions
+                it = it.view(-1).long() # and flatten indices for downstream processing
+
+            logprobs, state = self.get_logprobs_state(it, event , clip, clip_mask, state)
+
+            if t >= 1:
+                # stop when all finished
+                if t == 1:
+                    unfinished = it > 0
+                else:
+                    unfinished = unfinished & (it > 0)
+                if unfinished.sum() == 0:
+                    break
+                it = it * unfinished.type_as(it)
+                seq.append(it) #seq[t] the input of t+2 time step
+                seqLogprobs.append(sampleLogprobs.view(-1))
+
+        if seq==[] or len(seq)==0:
+            return [],[]
+        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
+
+class AllImgCore(nn.Module):
+    def __init__(self, opt):
+        super(AllImgCore, self).__init__()
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.att_feat_size = opt.clip_context_dim
+
+        self.opt = opt
+        self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type
+        self.input_dim = self.decide_input_feats_dim()
+        self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim,
+                           self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm)
+        assert self.wordRNN_input_feats_type == 'C'
+
+    def decide_input_feats_dim(self):
+        dim = 0
+        if 'E' in self.wordRNN_input_feats_type:
+            dim += self.opt.event_context_dim
+        if 'C' in self.wordRNN_input_feats_type:
+            dim += self.opt.clip_context_dim
+        return dim
+
+    def forward(self, xt, event, clip, clip_mask, state):
+        input_feats = (clip * clip_mask.unsqueeze(2)).sum(1) / (clip_mask.sum(1, keepdims=True) + 1e-5)
+        output, state = self.rnn(torch.cat([xt, input_feats], 1).unsqueeze(0), state)
+        return output.squeeze(0), state
+
+
+class LightCaptioner(Captioner):
+    def __init__(self, opt):
+        super(LightCaptioner, self).__init__(opt)
+        self.core = AllImgCore(opt)
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py b/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py
new file mode 100644
index 0000000000000000000000000000000000000000..918fb0ccf89416929b4cee8c1deadd7c99d586ae
--- /dev/null
+++ b/yc2_univl/backup/pdvc/CaptioningHead/LSTM_DSA.py
@@ -0,0 +1,289 @@
+# This file contains ShowAttendTell and AllImg model
+
+# ShowAttendTell(Soft attention) is from Show, Attend and Tell: Neural Image Caption Generation with Visual Attention
+# https://arxiv.org/abs/1502.03044
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import *
+
+from pdvc.ops.modules import MSDeformAttnCap
+
+class Captioner(nn.Module):
+    def __init__(self, opt):
+        super(Captioner, self).__init__()
+        self.opt = opt
+
+        self.vocab_size = opt.vocab_size
+        self.input_encoding_size = opt.input_encoding_size
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        self.max_caption_len = opt.max_caption_len
+
+        self.ss_prob = 0.0 # Schedule sampling probability
+        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
+
+        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
+        self.dropout = nn.Dropout(self.drop_prob_lm)
+
+        self.init_weights()
+
+    def init_weights(self):
+        initrange = 0.1
+        self.embed.weight.data.uniform_(-initrange, initrange)
+        self.logit.bias.data.fill_(0)
+        self.logit.weight.data.uniform_(-initrange, initrange)
+
+    def init_hidden(self, batch_size):
+        weight = next(self.parameters()).data
+        return (weight.new(self.num_layers, batch_size, self.rnn_size).zero_(),
+                weight.new(self.num_layers, batch_size, self.rnn_size).zero_())  # (h0, c0)
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        max_len = input.shape[1]
+        output = - (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def build_prob(self, input, target, mask):
+        '''
+        Calculate the sentence-level predicted prob for each GT sentence of each query
+        input: [num_sentence, max_length, num_words_voc]
+        '''
+        # breakpoint()
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1) # [num_sentence, max_length, num_words_voc]
+        max_len = input.shape[1]
+        # output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6)
+        output = (one_hot[:, :max_len] * input * mask[:, :max_len, None]).sum(-1).sum(-1) / (mask.sum(1) + 1e-6)
+        return output
+
+    def forward(self,hs, reference, others, cap_tensor):
+        seq = cap_tensor
+        vid_num, query_num, _ = hs.shape
+        assert vid_num == 1
+
+        reference_points = reference
+        input_flatten = others['memory']
+        input_spatial_shapes = others['spatial_shapes']
+        input_level_start_index = others['level_start_index']
+        input_padding_mask = others['mask_flatten']
+        if reference_points.shape[-1] == 2:
+            reference_points = reference_points[:, :, None] \
+                                     * torch.stack([others['valid_ratios']]*2, -1)[:, None]
+        elif reference_points.shape[-1] == 1: 
+            reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None, :, None]
+
+        query = hs
+        batch_size = query.shape[1]
+        state = self.init_hidden(batch_size)
+        outputs = []
+        raw_probs = []
+        seq = seq.long()
+
+        n_levels = self.core.n_levels
+        if n_levels < self.core.opt.num_feature_levels:
+            input_spatial_shapes = input_spatial_shapes[:n_levels]
+            input_level_start_index = input_level_start_index[:n_levels]
+            total_input_len = torch.prod(input_spatial_shapes, dim=1).sum()
+            input_flatten = input_flatten[:, :total_input_len]
+            input_padding_mask = input_padding_mask[:, :total_input_len]
+            reference_points = reference_points[:, :, :n_levels]
+            pass
+
+        for i in range(seq.size(1) - 1):
+            if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample
+                sample_prob = hs.new_zeros(batch_size).uniform_(0, 1)
+                sample_mask = sample_prob < self.ss_prob
+                if sample_mask.sum() == 0:
+                    it = seq[:, i].clone()
+                else:
+                    sample_ind = sample_mask.nonzero().view(-1)
+                    it = seq[:, i].data.clone()
+                    prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1)
+                    it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind))
+                    it = Variable(it, requires_grad=False)
+            else:
+                it = seq[:, i].clone()
+                # break if all the sequences end
+            if i >= 1 and seq[:, i].data.sum() == 0:
+                break
+
+            output, state, raw_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask)
+            outputs.append(output)
+            raw_probs.append(raw_prob)
+
+        if self.opt.refine_pseudo_box and self.training:
+            return torch.cat([_.unsqueeze(1) for _ in outputs], 1), torch.cat([_.unsqueeze(1) for _ in raw_probs], 1)
+
+        return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
+
+
+    def get_logprobs_state(self, it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask):
+        xt = self.embed(it)
+        output, state = self.core(xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, mask)
+        logprobs = F.log_softmax(self.logit(self.dropout(output)), dim=1)
+        softmax_probs = F.softmax(self.logit(self.dropout(output)), dim=1)
+        raw_probs = self.logit(self.dropout(output))
+        # raw_probs: [max_num_word, vocab_size+1]
+        return logprobs, state, raw_probs
+
+    def sample(self,hs, reference, others, opt={}):
+
+        vid_num, query_num, _ = hs.shape
+        assert vid_num == 1
+        batch_size = vid_num * query_num
+        sample_max = opt.get('sample_max', 1)
+        beam_size = opt.get('beam_size', 1)
+        temperature = opt.get('temperature', 1.0)
+
+        reference_points = reference
+        input_flatten = others['memory']
+        input_spatial_shapes = others['spatial_shapes']
+        input_level_start_index = others['level_start_index']
+        input_padding_mask = others['mask_flatten']
+        if reference_points.shape[-1] == 2:
+            reference_points = reference_points[:, :, None] \
+                                     * torch.stack([others['valid_ratios']]*2, -1)[:, None]
+        elif reference_points.shape[-1] == 1:
+            reference_points = reference_points[:, :, None] * others['valid_ratios'][:, None,:, None]
+        query = hs
+
+        n_levels = self.core.n_levels
+        if n_levels < self.core.opt.num_feature_levels:
+            input_spatial_shapes = input_spatial_shapes[:n_levels]
+            input_level_start_index = input_level_start_index[:n_levels]
+            total_input_len = torch.prod(input_spatial_shapes, dim=1).sum()
+            input_flatten = input_flatten[:, :total_input_len]
+            input_padding_mask = input_padding_mask[:, :total_input_len]
+            reference_points = reference_points[:, :, :n_levels]
+            pass
+
+        state = self.init_hidden(batch_size)
+
+        seq = []
+        seqLogprobs = []
+        #breakpoint()
+
+        for t in range(self.max_caption_len + 1):
+            if t == 0: # input <bos>
+                it = hs.data.new(batch_size).long().zero_()
+            elif sample_max:
+                sampleLogprobs, it = torch.max(logprobs.data, 1)
+                it = it.view(-1).long()
+            else:
+                if temperature == 1.0:
+                    prob_prev = torch.exp(logprobs.data) # fetch prev distribution: shape Nx(M+1)
+                else:
+                    # scale logprobs by temperature
+                    prob_prev = torch.exp(torch.div(logprobs.data, temperature))
+                it = torch.multinomial(prob_prev, 1)
+                sampleLogprobs = logprobs.gather(1, it) # gather the logprobs at sampled positions
+                it = it.view(-1).long() # and flatten indices for downstream processing
+
+            logprobs, state, softmax_prob = self.get_logprobs_state(it, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask)
+
+            if t >= 1:
+                # stop when all finished
+                if t == 1:
+                    unfinished = it > 0
+                else:
+                    unfinished = unfinished & (it > 0)
+                if unfinished.sum() == 0:
+                    break
+                it = it * unfinished.type_as(it)
+                seq.append(it) #seq[t] the input of t+2 time step
+                seqLogprobs.append(sampleLogprobs.view(-1))
+
+        if seq==[] or len(seq)==0:
+            return [],[]
+        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
+
+
+class ShowAttendTellCore(nn.Module):
+
+    def __init__(self, opt):
+        super(ShowAttendTellCore, self).__init__()
+        self.input_encoding_size = opt.input_encoding_size
+
+        self.rnn_size = opt.rnn_size
+        self.num_layers = opt.num_layers
+        self.drop_prob_lm = opt.drop_prob
+        #self.fc_feat_size = opt.fc_feat_size
+        self.att_feat_size = int(opt.clip_context_dim / opt.cap_nheads)
+        self.att_hid_size = opt.att_hid_size
+
+        self.opt = opt
+        self.wordRNN_input_feats_type = opt.wordRNN_input_feats_type
+        self.input_dim = opt.hidden_dim * 2
+
+        self.rnn = nn.LSTM(self.input_encoding_size + self.input_dim ,
+                                                      self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm)
+        self.att_drop = nn.Dropout(0.5)
+
+        d_model = opt.hidden_dim
+        self.n_levels = opt.cap_num_feature_levels
+        self.n_heads = opt.cap_nheads
+        self.n_points = opt.cap_dec_n_points
+
+        self.deformable_att = MSDeformAttnCap(d_model, self.n_levels, self.n_heads, self.n_points)
+
+        if self.att_hid_size > 0:
+            self.ctx2att = nn.Linear(self.att_feat_size, self.att_hid_size)
+            self.h2att = nn.Linear(self.rnn_size, self.att_hid_size)
+            self.alpha_net = nn.Linear(self.att_hid_size, 1)
+
+    def get_input_feats(self, event, att_clip):
+        input_feats = []
+        if 'E' in self.wordRNN_input_feats_type:
+            input_feats.append(event)
+        if 'C' in self.wordRNN_input_feats_type:
+            input_feats.append(att_clip)
+        input_feats = torch.cat(input_feats,1)
+        return input_feats
+
+    def forward(self,xt, state, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask):
+
+        joint_query = torch.cat((state[0][-1].unsqueeze(0), query), 2)
+        # (N_, N_q, C)
+
+        N_, Lq_, L_, _ = reference_points.shape
+
+        # (N_ * M_, D_, Lq_, L_* P_)
+        clip = self.deformable_att(joint_query, reference_points, input_flatten, input_spatial_shapes,
+                                       input_level_start_index, input_padding_mask)
+        clip = clip.reshape(N_, self.n_heads, -1, Lq_, self.n_levels * self.n_points).permute(0, 3, 1, 4, 2)
+        clip = clip.reshape(N_ * Lq_, self.n_heads, self.n_levels * self.n_points, self.att_feat_size)
+        att_size = self.n_levels * self.n_points
+
+        att = self.ctx2att(clip)                             # (batch * att_size) * att_hid_size
+        att = att.view(-1, self.n_heads, att_size, self.att_hid_size)     # batch * att_size * att_hid_size
+        att_h = self.h2att(state[0][-1])                    # batch * att_hid_size
+        att_h = att_h.unsqueeze(1).unsqueeze(1).expand_as(att)           # batch * att_size * att_hid_size
+        dot = att + att_h                                   # batch * att_size * att_hid_size
+        dot = torch.tanh(dot)  # batch * att_size * att_hid_size
+        dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
+        dot = self.alpha_net(dot)                           # (batch * att_size) * 1
+        dot = dot.view(-1, att_size)                        # batch * att_size
+
+        weight = F.softmax(dot, dim=1)
+        att_feats_ = clip.reshape(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size
+        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size
+        att_res = att_res.reshape(N_ * Lq_, self.n_heads, self.att_feat_size).flatten(1)
+        input_feats = torch.cat((att_res.unsqueeze(0), query), 2)
+        # print(xt.shape, input_feats.shape, query.shape, reference_points.shape)
+        output, state = self.rnn(torch.cat([xt.unsqueeze(0), input_feats], 2), state)
+
+        return output.squeeze(0), state
+
+
+class LSTMDSACaptioner(Captioner):
+    def __init__(self, opt):
+        super(LSTMDSACaptioner, self).__init__(opt)
+        self.core = ShowAttendTellCore(opt)
+
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py b/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3051b3d3de863fefc196e08740e7d6d05474adfd
--- /dev/null
+++ b/yc2_univl/backup/pdvc/CaptioningHead/Puppet.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+
+class PuppetCaptionModel(nn.Module):
+    def __init__(self, opt):
+        super(PuppetCaptionModel, self).__init__()
+        self.vocab_size = opt.vocab_size
+        self.opt = opt
+        self.puppet_layer= nn.Linear(1,1)
+
+    def forward(self, event, clip, clip_mask, seq):
+        N, L = seq.shape
+        output = torch.zeros((N, L-1, self.vocab_size + 1), device=seq.device)
+        return output
+
+    def sample(self, event, clip, clip_mask, opt={}):
+        N, _, C = clip.shape
+        output = torch.zeros((N, 3), device=clip.device)
+        prob = torch.zeros((N, 3), device=clip.device)
+        return output, prob
+
+    def build_loss(self, input, target, mask):
+        one_hot = torch.nn.functional.one_hot(target, self.opt.vocab_size+1)
+        output = - (one_hot * input * mask[..., None]).sum(2).sum(1) / (mask.sum(1) + 1e-6)
+        return output
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__init__.py b/yc2_univl/backup/pdvc/CaptioningHead/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..55abd1cc8681971b0e498d5db23771053240029f
--- /dev/null
+++ b/yc2_univl/backup/pdvc/CaptioningHead/__init__.py
@@ -0,0 +1,22 @@
+from .LSTM import LightCaptioner
+from .Puppet import PuppetCaptionModel
+from .LSTM_DSA import LSTMDSACaptioner
+
+def build_captioner(opt):
+    if opt.caption_decoder_type == 'none':
+        caption_embed = PuppetCaptionModel(opt)
+
+    elif opt.caption_decoder_type == 'light':
+        opt.event_context_dim = None
+        opt.clip_context_dim = opt.hidden_dim
+        caption_embed = LightCaptioner(opt)
+
+    elif opt.caption_decoder_type == 'standard':
+        opt.event_context_dim = None
+        opt.clip_context_dim = opt.hidden_dim
+        caption_embed = LSTMDSACaptioner(opt)
+
+    else:
+        raise ValueError('caption decoder type is invalid')
+    return caption_embed
+
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96e1403d966894f3897772ec3341693c9e1e2097
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7505e3befe8da0cfc2e2cf4ad989639a7aad658
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac28b0fdbaca42bce04d24e8200908e43ca3849d
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82aceccc9d18b389c1de136320f99a9d3948bc21
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/LSTM_DSA.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..204ecd5a71e01bd0a22222a738ac51abf7b3af9a
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86f06e3b6d2a72ca205a646c86a1e9309be235c6
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/Puppet.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7506f43c89c0c6345ffd3c53b53cd87d5c394cbc
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e0d650e0f33bbf2aa9248e89a8ac9ec8a76397b
Binary files /dev/null and b/yc2_univl/backup/pdvc/CaptioningHead/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/UniVL.py b/yc2_univl/backup/pdvc/UniVL.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5a8bcf7f019968d8751bbbab0537295c77ebfdd
--- /dev/null
+++ b/yc2_univl/backup/pdvc/UniVL.py
@@ -0,0 +1,238 @@
+
+import os
+import random
+import numpy as np
+from pathlib import Path
+from pdvc.modules.modeling import UniVL
+from pdvc.modules.tokenization import BertTokenizer
+from transformers import AutoTokenizer, BertForPreTraining
+import torch
+import argparse
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+class UniVL_args(object):
+    def __init__(self) -> None:
+        self.do_pretrain = False
+        self.do_train = False
+        self.do_eval = True
+        self.train_csv = 'data/youcookii_singlef_train.csv'
+        self.val_csv = 'data/youcookii_singlef_val.csv'
+        self.data_path = 'data/youcookii_caption.pickle'
+        self.features_path = 'data/youcookii_videos_feature.pickle'
+        self.num_thread_reader = 1
+        self.lr = 0.0001
+        self.epochs = 20
+        self.batch_size = 256
+        self.batch_size_val = 3500
+        self.lr_decay = 0.9
+        self.n_display = 100
+        self.video_dim = 1024
+        self.seed = 42
+        self.max_words = 48
+        self.max_frames = 100
+        self.feature_framerate = 1
+        self.margin = 0.1
+        self.hard_negative_rate = 0.5
+        self.negative_weighting = 1
+        self.n_pair = 1
+        self.output_dir = None
+        self.bert_model = "bert-base-uncased"
+        self.visual_model = "visual-base"
+        self.cross_model = "cross-base"
+        self.decoder_model = "decoder-base"
+        self.init_model = None
+        self.do_lower_case = True
+        self.warmup_proportion = 0.1
+        self.gradient_accumulation_steps = 1
+        self.n_gpu = 1
+        self.cache_dir = ""
+        self.fp16 = False
+        self.fp16_opt_level = 'O1'
+        self.task_type = "retrieval"
+        self.datatype = "youcook"
+        self.world_size = 0
+        self.local_rank = 0
+        self.coef_lr = 0.1
+        self.use_mil = False
+        self.sampled_use_mil = False
+        self.text_num_hidden_layers = 12
+        self.visual_num_hidden_layers = 6
+        self.cross_num_hidden_layers = 2
+        self.decoder_num_hidden_layers = 3
+        self.train_sim_after_cross = False
+        self.expand_msrvtt_sentences = False
+        self.batch_size = int(self.batch_size / self.gradient_accumulation_steps)
+
+    def __repr__(self) -> str:
+        return str(self.__dict__)
+
+
+
+
+# def get_args(description='UniVL on Retrieval Task'):
+#     parser = argparse.ArgumentParser(description=description)
+#     parser.add_argument("--do_pretrain", action='store_true', help="Whether to run training.")
+#     parser.add_argument("--do_train", action='store_true', help="Whether to run training.")
+#     parser.add_argument("--do_eval", action='store_true', default=True, help="Whether to run eval on the dev set.")
+
+#     parser.add_argument('--train_csv', type=str, default='data/youcookii_singlef_train.csv', help='')
+#     parser.add_argument('--val_csv', type=str, default='data/youcookii_singlef_val.csv', help='')
+#     parser.add_argument('--data_path', type=str, default='data/youcookii_caption.pickle', help='data pickle file path')
+#     parser.add_argument('--features_path', type=str, default='data/youcookii_videos_feature.pickle', help='feature path')
+
+#     parser.add_argument('--num_thread_reader', type=int, default=1, help='')
+#     parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate')
+#     parser.add_argument('--epochs', type=int, default=20, help='upper epoch limit')
+#     parser.add_argument('--batch_size', type=int, default=256, help='batch size')
+#     parser.add_argument('--batch_size_val', type=int, default=3500, help='batch size eval')
+#     parser.add_argument('--lr_decay', type=float, default=0.9, help='Learning rate exp epoch decay')
+#     parser.add_argument('--n_display', type=int, default=100, help='Information display frequence')
+#     parser.add_argument('--video_dim', type=int, default=1024, help='video feature dimension')
+#     parser.add_argument('--seed', type=int, default=42, help='random seed')
+#     parser.add_argument('--max_words', type=int, default=20, help='')
+#     parser.add_argument('--max_frames', type=int, default=100, help='')
+#     parser.add_argument('--feature_framerate', type=int, default=1, help='')
+#     parser.add_argument('--margin', type=float, default=0.1, help='margin for loss')
+#     parser.add_argument('--hard_negative_rate', type=float, default=0.5, help='rate of intra negative sample')
+#     parser.add_argument('--negative_weighting', type=int, default=1, help='Weight the loss for intra negative')
+#     parser.add_argument('--n_pair', type=int, default=1, help='Num of pair to output from data loader')
+
+#     parser.add_argument("--output_dir", default=None, type=str,
+#                         help="The output directory where the model predictions and checkpoints will be written.")
+#     parser.add_argument("--bert_model", default="bert-base-uncased", type=str,
+#                         help="Bert pre-trained model")
+#     parser.add_argument("--visual_model", default="visual-base", type=str, required=False, help="Visual module")
+#     parser.add_argument("--cross_model", default="cross-base", type=str, required=False, help="Cross module")
+#     parser.add_argument("--decoder_model", default="decoder-base", type=str, required=False, help="Decoder module")
+#     parser.add_argument("--init_model", default=None, type=str, required=False, help="Initial model.")
+#     parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.")
+#     parser.add_argument("--warmup_proportion", default=0.1, type=float,
+#                         help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% of training.")
+#     parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
+#                         help="Number of updates steps to accumulate before performing a backward/update pass.")
+#     parser.add_argument('--n_gpu', type=int, default=1, help="Changed in the execute process.")
+
+#     parser.add_argument("--cache_dir", default="", type=str,
+#                         help="Where do you want to store the pre-trained models downloaded from s3")
+
+#     parser.add_argument('--fp16', action='store_true',
+#                         help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
+#     parser.add_argument('--fp16_opt_level', type=str, default='O1',
+#                         help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+#                              "See details at https://nvidia.github.io/apex/amp.html")
+
+#     parser.add_argument("--task_type", default="retrieval", type=str, help="Point the task `retrieval` to finetune.")
+#     parser.add_argument("--datatype", default="youcook", type=str, help="Point the dataset `youcook` to finetune.")
+
+#     parser.add_argument("--world_size", default=0, type=int, help="distribted training")
+#     parser.add_argument("--local_rank", default=0, type=int, help="distribted training")
+#     parser.add_argument('--coef_lr', type=float, default=0.1, help='coefficient for bert branch.')
+#     parser.add_argument('--use_mil', action='store_true', help="Whether use MIL as Miech et. al. (2020).")
+#     parser.add_argument('--sampled_use_mil', action='store_true', help="Whether MIL, has a high priority than use_mil.")
+
+#     parser.add_argument('--text_num_hidden_layers', type=int, default=12, help="Layer NO. of text.")
+#     parser.add_argument('--visual_num_hidden_layers', type=int, default=6, help="Layer NO. of visual.")
+#     parser.add_argument('--cross_num_hidden_layers', type=int, default=2, help="Layer NO. of cross.")
+#     parser.add_argument('--decoder_num_hidden_layers', type=int, default=3, help="Layer NO. of decoder.")
+
+#     parser.add_argument('--train_sim_after_cross', action='store_true', help="Test retrieval after cross encoder.")
+#     parser.add_argument('--expand_msrvtt_sentences', action='store_true', help="")
+
+#     args = parser.parse_args()
+
+#     # Check paramenters
+#     if args.gradient_accumulation_steps < 1:
+#         raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
+#             args.gradient_accumulation_steps))
+#     if not args.do_train and not args.do_eval:
+#         raise ValueError("At least one of `do_train` or `do_eval` must be True.")
+
+#     args.batch_size = int(args.batch_size / args.gradient_accumulation_steps)
+
+#     return args
+
+def set_seed_logger(args):
+    # predefining random initial seeds
+    random.seed(args.seed)
+    os.environ['PYTHONHASHSEED'] = str(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    torch.cuda.manual_seed(args.seed)
+    torch.cuda.manual_seed_all(args.seed)  # if you are using multi-GPU.
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+    # world_size = torch.distributed.get_world_size()
+    # torch.cuda.set_device(args.local_rank)
+    # args.world_size = world_size
+
+    # if not os.path.exists(args.output_dir):
+    #     os.makedirs(args.output_dir, exist_ok=True)
+
+    return args
+
+def load_pretrained_UniVL(return_visual_encoder=False):
+
+    args = UniVL_args()
+    args = set_seed_logger(args)
+    device, n_gpu = 'cuda', 1
+
+    init_model = '/cpfs01/user/liuhuabin/PDVC/pdvc/modules/univl.pretrained.bin'
+    model_state_dict = torch.load(init_model, map_location='cpu')
+
+    # Prepare model
+    cache_dir = os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed')
+    model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base',
+                                   cache_dir=cache_dir, state_dict=model_state_dict, task_config=args)
+
+    model.to(device)
+    if return_visual_encoder:
+        return model.bert, model.visual, model.normalize_video
+    else:
+        return model.bert
+
+def build_UniVL_tokenizer():
+    return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
+
+# if __name__ == '__main__':
+#     device, n_gpu = 'cuda', 1
+#     captions = ['I love you', 'you believe me']
+
+#     tokenizer_hg = AutoTokenizer.from_pretrained("bert-base-uncased")
+#     text_encoder_hg = tokenizer_hg(captions, return_tensors='pt', truncation=True, padding=True, max_length=20)
+#     text_encoder_hg = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_hg.items()}
+#     attention_mask = text_encoder_hg['attention_mask']
+
+#     args = UniVL_args()
+#     args = set_seed_logger(args)
+#     args.init_model = 'modules/univl.pretrained.bin'
+#     # tokenizer = build_UniVL_tokenizer()
+#     # input_ids = []
+#     # for sent in captions:
+#     #     sent = tokenizer.tokenize(sent)
+#     #     sent = ['[CLS]'] + sent + ['[SEP]']
+#     #     input_ids += tokenizer.convert_tokens_to_ids(sent)
+#     model = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model)
+#     text_embed = model(**text_encoder_hg, output_all_encoded_layers=True)[0][-1]
+#     breakpoint()
+
+if __name__ == '__main__':
+    device, n_gpu = 'cuda', 1
+    args = UniVL_args()
+    args = set_seed_logger(args)
+    args.init_model = 'modules/univl.pretrained.bin'
+    # tokenizer = build_UniVL_tokenizer()
+    # input_ids = []
+    # for sent in captions:
+    #     sent = tokenizer.tokenize(sent)
+    #     sent = ['[CLS]'] + sent + ['[SEP]']
+    #     input_ids += tokenizer.convert_tokens_to_ids(sent)
+    model_bert, model_visual, video_normalizer = load_pretrained_UniVL(args, device, n_gpu, args.local_rank, args.init_model)
+    inputs = torch.rand(2,215,1024)
+    video_mask = torch.ones(2,215)
+    inputs = video_normalizer(inputs)
+    visual_embed = model_visual(inputs, video_mask, output_all_encoded_layers=True)[0][-1]
+    
+    breakpoint()
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/__init__.py b/yc2_univl/backup/pdvc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..194ecd26a483cef3e67c0e5cd971d4f7784aac67
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a406cf3565bfcd54eddc5d19fbeae7bffd2d629
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d4a911fc83c9364bfc6b98dd5d3d5a4ed14f5e3f
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/__init__.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9cafe04379877ab0c87872ae9835aa9bdf4532a4
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6961cba44a3fa93be1463250d574c8d91411714f
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/base_encoder.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d08274f898128d993db3370b9307fabf56c98f6
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/criterion.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f48fdb961f47546c71e60e995699a206b62a4f6a
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d45de6e0f900d019a24e0f339e62874f2038557e
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/deformable_transformer.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6efd23cdeac69c752a715a184606139f2aded19b
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f80042e195d3ecda40db7fe17e8b2b6b8991a376
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/matcher.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5958f3c996e09fc92224c0dfbc6f1585d0c2b6c
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c50c9f41bc67334949478d72b69f998d849c9f37
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2b9fbde23c0b61d1377c3e8a2c9af095131c45d
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/position_encoding.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6e503a7b7440cff82242de19b9d909ba99e5f803
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/util.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df84303c83b25082e579d99e0bdbc7c05bf182ef
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/util.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a9e333fb5a96578c8f8c3017ccf7d80466fff6f
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff5a503c7efa463a12801a2f62599ed146e5ca93
Binary files /dev/null and b/yc2_univl/backup/pdvc/__pycache__/video_segmentation.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/base_encoder.py b/yc2_univl/backup/pdvc/base_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cb150a62dbb709589ec5271fe1b11ec16adf8f8
--- /dev/null
+++ b/yc2_univl/backup/pdvc/base_encoder.py
@@ -0,0 +1,86 @@
+# ------------------------------------------------------------------------
+# PDVC
+# ------------------------------------------------------------------------
+# Modified from Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Base Encoder to create multi-level conv features and positional embedding.
+"""
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from misc.detr_utils.misc import NestedTensor
+from .position_encoding import PositionEmbeddingSine
+
+
+class BaseEncoder(nn.Module):
+    def __init__(self, num_feature_levels, vf_dim, hidden_dim):
+        super(BaseEncoder, self).__init__()
+        self.pos_embed = PositionEmbeddingSine(hidden_dim//2, normalize=True)
+        self.num_feature_levels = num_feature_levels
+        self.hidden_dim = hidden_dim
+
+        if num_feature_levels > 1:
+            input_proj_list = []
+            in_channels = vf_dim
+            input_proj_list.append(nn.Sequential(
+                nn.Conv1d(in_channels, hidden_dim, kernel_size=1),
+                nn.GroupNorm(32, hidden_dim),
+            ))
+            for _ in range(num_feature_levels - 1):
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv1d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+                in_channels = hidden_dim
+            self.input_proj = nn.ModuleList(input_proj_list)
+        else:
+            self.input_proj = nn.ModuleList([
+                nn.Sequential(
+                    nn.Conv2d(vf_dim, hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                )])
+
+        for proj in self.input_proj:
+            nn.init.xavier_uniform_(proj[0].weight, gain=1)
+            nn.init.constant_(proj[0].bias, 0)
+
+    def forward(self, vf, mask, duration):
+        # vf: (N, L, C), mask: (N, L),  duration: (N)
+        vf = vf.transpose(1, 2)  # (N, L, C) --> (N, C, L)
+        vf_nt = NestedTensor(vf, mask, duration)
+        pos0 = self.pos_embed(vf_nt)
+
+        srcs = []
+        masks = []
+        poses = []
+
+        src0, mask0 = vf_nt.decompose()
+        srcs.append(self.input_proj[0](src0))
+        masks.append(mask0)
+        poses.append(pos0)
+        assert mask is not None
+
+        for l in range(1, self.num_feature_levels):
+            if l == 1:
+                src = self.input_proj[l](vf_nt.tensors)
+            else:
+                src = self.input_proj[l](srcs[-1])
+            m = vf_nt.mask
+            mask = F.interpolate(m[None].float(), size=src.shape[-1:]).to(torch.bool)[0]
+            pos_l = self.pos_embed(NestedTensor(src, mask, duration)).to(src.dtype)
+            srcs.append(src)
+            masks.append(mask)
+            poses.append(pos_l)
+        return srcs, masks, poses
+
+def build_base_encoder(args):
+    base_encoder = BaseEncoder(args.num_feature_levels, args.feature_dim, args.hidden_dim)
+    return base_encoder
diff --git a/yc2_univl/backup/pdvc/criterion.py b/yc2_univl/backup/pdvc/criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..d47eb41a6711be9904ad6c55d502572261ff73c9
--- /dev/null
+++ b/yc2_univl/backup/pdvc/criterion.py
@@ -0,0 +1,726 @@
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+import copy
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from misc.detr_utils import box_ops
+from misc.detr_utils.misc import (accuracy, get_world_size,
+                         is_dist_avail_and_initialized)
+
+class SetCriterion(nn.Module):
+    """ This class computes the loss for DETR.
+    The process happens in two steps:
+        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
+        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
+    """
+    def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}):
+        """ Create the criterion.
+        Parameters:
+            num_classes: number of object categories, omitting the special no-object category
+            matcher: module able to compute a matching between targets and proposals
+            weight_dict: dict containing as key the names of the losses and as values their relative weight.
+            losses: list of all the losses to be applied. See get_loss for list of available losses.
+            focal_alpha: alpha in Focal Loss
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.losses = losses
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.opt = opt
+        self.pseudo_box_aug = opt.pseudo_box_aug
+        self.refine_pseudo_box = opt.refine_pseudo_box
+        if ('Tasty' in opt.visual_feature_folder[0]) or ('tasty' in opt.visual_feature_folder[0]):
+            counter_class_rate  =[0.0, 0.012703673018503175, 0.04915769124551229, 0.06489919911626622, 0.0740127036730185, 0.07346037006351837, 0.08064070698702017,
+            0.07069870201601768, 0.07870753935376967, 0.07097486882076774, 0.06766086716376692, 0.0579950289975145, 0.05247169290251312, 0.03783485225075946,
+            0.03534935100800884, 0.03203534935100801, 0.026788180060756697, 0.02236951118475559, 0.01988400994200497, 0.016570008285004142, 0.013256006628003313,
+            0.00856117094725214, 0.006904170118751726, 0.005523336095001381, 0.004694835680751174, 0.0038663352665009665, 0.0027616680475006906, 0.0027616680475006906,
+            0.0016570008285004142, 0.0016570008285004142, 0.0005523336095001381, 0.0008285004142502071, 0.0, 0.00027616680475006904, 0.0, 0.0, 0.00027616680475006904,
+            0.0011046672190002762, 0.0, 0.0005523336095001381, 0.0, 0.0, 0.0005523336095001381]
+        else:
+            counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01,
+       1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02,
+       1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03,
+       2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04,
+       4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04,
+       2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00,
+       0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05]
+        self.counter_class_rate = torch.tensor(counter_class_rate)
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
+        """Classification loss (NLL)
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        indices, many2one_indices = indices
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1],
+                                            dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device)
+        target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)
+
+        target_classes_onehot = target_classes_onehot[:,:,:-1]
+        loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1]
+        losses = {'loss_ce': loss_ce}
+        pred_count = outputs['pred_count']
+        max_length = pred_count.shape[1] - 1
+        counter_target = [len(target['boxes']) if len(target['boxes']) < max_length  else max_length for target in targets]
+        counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long)
+        counter_target_onehot = torch.zeros_like(pred_count)
+        counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1)
+        weight = self.counter_class_rate[:max_length + 1].to(src_logits.device)
+
+        counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight)
+        losses['loss_counter'] = counter_loss
+
+        return losses
+
+    @torch.no_grad()
+    def loss_cardinality(self, outputs, targets, indices, num_boxes):
+        """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
+        This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
+        """
+        pred_logits = outputs['pred_logits']
+        device = pred_logits.device
+        tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device)
+        # Count the number of predictions that are NOT "no-object" (which is the last class)
+        card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
+        card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
+        losses = {'cardinality_error': card_err}
+        return losses
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes):
+        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
+           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 2]
+           The target boxes are expected in format (center, length), normalized by the image size.
+        """
+        indices, many2one_indices = indices
+        N = len(indices[-1][0])
+        assert 'pred_boxes' in outputs
+        idx, idx2 = self._get_src_permutation_idx2(indices)
+        src_boxes = outputs['pred_boxes'][idx]
+        if self.opt.use_pseudo_box and self.training:
+            # print('use pseudo box')
+            target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        else:
+            # print('use gt box')
+            target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')
+
+        losses = {}
+        losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+        loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
+            box_ops.box_cl_to_xy(src_boxes),
+            box_ops.box_cl_to_xy(target_boxes)))
+        losses['loss_giou'] = loss_giou.sum() / num_boxes
+        # print(src_boxes)
+        self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes),
+                                              box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1)
+        sizes = [len(v[0]) for v in indices]
+        if sizes == [1]:
+            losses['loss_self_iou'] = self_iou
+            return losses
+        self_iou_split = 0
+        for i, c in enumerate(self_iou.split(sizes, -1)):
+            cc = c.split(sizes, -2)[i]
+            self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1))
+        has_nan = False if torch.all(~torch.isnan(self_iou_split)) else True  
+        has_inf = False if torch.all(torch.isfinite(self_iou_split)) else True
+        if has_nan or has_inf:
+            breakpoint()
+        losses['loss_self_iou'] = self_iou_split
+
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_src_permutation_idx2(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        src_idx2 = torch.cat([src for (_, src) in indices])
+        return (batch_idx, src_idx), src_idx2
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+    
+
+
+    def get_jittered_box(self, box, box_jitter, box_aug_num=5, mode='random'):
+        # breakpoint()
+        box = box.unsqueeze(0) # (1,2)
+        if mode == 'random':
+            scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale = torch.cat([scale_c, scale_d], dim=1)
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel()
+            box_repeat = box.repeat(box_aug_num, 1)
+            box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]]
+        elif mode == 'random_new':
+            scale_c = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale_d = torch.empty((1000, 1), dtype=box.dtype, device=box.device).uniform_(1-box_jitter, 1+box_jitter)
+            scale = torch.cat([scale_c, scale_d], dim=1)
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            min_keep_cnt = (box_aug_num-1) if (box_aug_num-1) < keep_idx.numel() else keep_idx.numel()
+            box_repeat = box.repeat(box_aug_num, 1)
+            box_repeat[:min_keep_cnt] = scale_box[keep_idx[:min_keep_cnt]]
+        elif mode == 'uniform':
+            ratio_c = box_jitter
+            ratio_d = 0.048 / 2
+            scale_c = torch.tensor([-ratio_c, -ratio_c/2, -ratio_c/4, ratio_c/4, ratio_c/2, ratio_c])
+            scale_d = torch.tensor([-ratio_d, -ratio_d/2, ratio_d/2, ratio_d])
+            scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device)
+            breakpoint()
+            scale_box = box + scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            unkeep_idx = torch.where(iou.reshape(-1) <= 0.1)[0]
+            if keep_idx.numel() < (box_aug_num-1):
+                box_repeat = box.repeat(box_aug_num, 1)
+                box_repeat[:keep_idx.numel()] = scale_box[keep_idx]
+                random_indices = torch.randperm(unkeep_idx.size(0))[:(box_aug_num-1-keep_idx.numel())]
+                box_repeat[keep_idx.numel():(box_aug_num-1)] = scale_box[unkeep_idx[random_indices]]
+            else:
+                box_repeat = box.repeat(box_aug_num, 1)
+                random_indices = torch.randperm(keep_idx.numel())[:(box_aug_num-1)]
+                box_repeat[:box_aug_num-1] = scale_box[keep_idx[random_indices]]
+        elif mode == 'uniform_old':
+            # Conduct augment using pre-defined ratio
+            ratio_c = box_jitter
+            ratio_d = box_jitter
+            scale_c = torch.linspace(1-ratio_c, 1+ratio_c, 4)
+            scale_d = torch.linspace(1-ratio_d, 1+ratio_d, 2)
+            scale = torch.cartesian_prod(scale_c, scale_d).to(device=box.device) # 16 augmented boxes in total
+            scale_box = box * scale
+            scale_box = scale_box.clamp(min=0., max=1.)
+            iou, _ = box_ops.box_iou(box_ops.box_cl_to_xy(scale_box), box_ops.box_cl_to_xy(box))
+            # keep_idx = torch.where(iou.reshape(-1) > 0.1)[0]
+            box_repeat = box.repeat(box_aug_num, 1)
+            random_indices = torch.randperm(scale_box.size(0))[:(box_aug_num-1)]
+            box_repeat[:(box_aug_num-1)] = scale_box[random_indices]
+        elif mode == 'random_range':
+            def batch_randomize_boxes(boxes, max_vary_range, num_samples=1):
+                # Get the centers and widths from the input boxes
+                centers = boxes[:, 0]
+                widths = boxes[:, 1]
+                # breakpoint()
+                # Generate random values for the left and right boundaries for each box
+
+                left_boundaries = centers - (widths / 2) - torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range)
+                right_boundaries = centers + (widths / 2) + torch.empty(centers.size(0), num_samples, device=boxes.device).uniform_(0, max_vary_range)
+
+                # Ensure that the boundaries stay within the [0, 1] range
+                left_boundaries = left_boundaries.clamp(0, 1)
+                right_boundaries = right_boundaries.clamp(0, 1)
+
+
+                # Calculate the new centers and widths
+                new_centers = (left_boundaries + right_boundaries) / 2
+                new_widths = right_boundaries - left_boundaries
+
+                # Ensure that the widths are non-negative and revert to the original boxes if needed
+                is_negative = new_widths <= 0
+                new_widths = torch.where(is_negative, widths, new_widths)
+                new_centers = torch.where(is_negative, centers, new_centers)
+
+                # Create and return the new boxes tensor
+                new_boxes = torch.stack((new_centers, new_widths), dim=2)
+                return new_boxes.squeeze(0)
+            box_repeat = batch_randomize_boxes(box, box_jitter, box_aug_num)
+            if torch.isnan(box_repeat).any():
+                breakpoint()
+        elif mode == 'augment_width': # original width is 0.5 \sigma range
+            import random
+            def augment_boxes_with_scale(boxes, scale, num_augments):
+                augmented_boxes = []
+                for _ in range(num_augments):
+                    center, width = boxes[0]
+                    # Generate a random scale factor with a more uniform distribution
+                    random_scale = scale ** random.uniform(-1, 1)
+                    new_width = width * random_scale
+                    if center + new_width / 2 > 1 or center - new_width / 2 < 0:
+                        new_width = width
+                    augmented_boxes.append([center, new_width])
+                augmented_boxes = torch.tensor(augmented_boxes, device=boxes.device)
+                return augmented_boxes
+            box_repeat = augment_boxes_with_scale(box, box_jitter, box_aug_num)
+            # breakpoint()
+
+        else:
+            raise NotImplementedError('Not support box augmentation mode: {}'.format(mode))      
+        return box_repeat
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'cardinality': self.loss_cardinality,
+            'boxes': self.loss_boxes,
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
+
+    def forward(self, outputs, targets, others=None, aug_num=None, aug_ratio=None):
+        """ This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             targets: list of dicts, such that len(targets) == batch_size.
+                      The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'}
+        if self.training and self.pseudo_box_aug:
+            targets_cp = copy.deepcopy(targets)
+            assert self.opt.use_pseudo_box
+            for i in range((len(targets_cp))):
+                boxes_aug = []
+                for j in range(len(targets_cp[i]['labels'])):
+                    try: 
+                        pseudo_box = targets_cp[i]['boxes_pseudo'][j]
+                    except:
+                        breakpoint()
+                    peseudo_box_aug = self.get_jittered_box(pseudo_box, aug_ratio, aug_num, self.opt.pseudo_box_aug_mode)
+                    boxes_aug.append(peseudo_box_aug)
+                targets_cp[i]['boxes_pseudo'] = torch.cat(boxes_aug, dim=0)
+                targets_cp[i]['labels'] = targets_cp[i]['labels'].unsqueeze(dim=1).repeat(1, aug_num).reshape(-1,)
+                targets[i]['box_pseudo_aug'] = torch.cat(boxes_aug, dim=0)
+        # Retrieve the matching between the outputs of the last layer and the targets
+            last_indices = self.matcher(outputs_without_aux, targets_cp)
+        else:
+            targets_cp = targets
+            last_indices = self.matcher(outputs_without_aux, targets)
+        outputs['matched_indices'] = last_indices
+
+        num_boxes = sum(len(t["labels"]) for t in targets_cp)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            kwargs = {}
+            losses.update(self.get_loss(loss, outputs, targets_cp, last_indices, num_boxes, **kwargs))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            aux_indices = []
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(aux_outputs, targets_cp)
+                aux_indices.append(indices)
+                for loss in self.losses:
+                    if loss == 'masks':
+                        # Intermediate masks losses are too costly to compute, we ignore them.
+                        continue
+                    kwargs = {}
+                    if loss == 'labels':
+                        # Logging is enabled only for the last layer
+                        kwargs['log'] = False
+                    l_dict = self.get_loss(loss, aux_outputs, targets_cp, indices, num_boxes, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            return losses, last_indices, aux_indices
+        return losses, last_indices
+
+class AlignCriterion(nn.Module):
+    """ This class computes the loss for DETR.
+    The process happens in two steps:
+        1) we compute DTW assignment between ground truth captions and the outputs object queries
+        2) we supervise each pair of matched ground-truth / prediction (supervise class)
+    """
+    def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25, focal_gamma=2, opt={}):
+        """ Create the criterion.
+        Parameters:
+            num_classes: number of object categories, omitting the special no-object category
+            matcher: module able to compute a matching between targets and proposals
+            weight_dict: dict containing as key the names of the losses and as values their relative weight.
+            losses: list of all the losses to be applied. See get_loss for list of available losses.
+            focal_alpha: alpha in Focal Loss
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.losses = losses
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.opt = opt
+        counter_class_rate = [0.00000000e+00, 0.00000000e+00, 1.93425917e-01, 4.12129084e-01,
+       1.88929963e-01, 7.81296833e-02, 5.09541413e-02, 3.12718553e-02,
+       1.84833650e-02, 8.39244680e-03, 6.59406534e-03, 4.49595364e-03,
+       2.19802178e-03, 1.79838146e-03, 5.99460486e-04, 4.99550405e-04,
+       4.99550405e-04, 1.99820162e-04, 2.99730243e-04, 3.99640324e-04,
+       2.99730243e-04, 0.00000000e+00, 1.99820162e-04, 0.00000000e+00,
+       0.00000000e+00, 0.00000000e+00, 9.99100809e-05, 9.99100809e-05]
+        self.counter_class_rate = torch.tensor(counter_class_rate)
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
+        """Classification loss (NLL)
+        Compute the classification loss and counter loss
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        indices, many2one_indices = indices
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1],
+                                            dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device)
+        target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)
+
+        target_classes_onehot = target_classes_onehot[:,:,:-1]
+        loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=self.focal_gamma) * src_logits.shape[1]
+        losses = {'loss_ce': loss_ce}
+
+        pred_count = outputs['pred_count']
+        max_length = pred_count.shape[1] - 1
+        counter_target = [len(target['boxes']) if len(target['boxes']) < max_length  else max_length for target in targets]
+        counter_target = torch.tensor(counter_target, device=src_logits.device, dtype=torch.long)
+        counter_target_onehot = torch.zeros_like(pred_count)
+        counter_target_onehot.scatter_(1, counter_target.unsqueeze(-1), 1)
+        weight = self.counter_class_rate[:max_length + 1].to(src_logits.device)
+        # breakpoint()
+        counter_loss = cross_entropy_with_gaussian_mask(pred_count, counter_target_onehot, self.opt, weight)
+        losses['loss_counter'] = counter_loss
+
+        return losses
+    
+    def loss_boxes(self, outputs, targets, indices, num_boxes):
+        # Compute temporal IOU loss among given predicted N temporal boundaries, which encourages the temporal boundaries to be more diverse and no overlap
+        # outputs: (bsz, num_query, 2)
+        # breakpoint()
+        # breakpoint()
+        indices, many2one_indices = indices
+        idx, idx2 = self._get_src_permutation_idx2(indices)
+        src_boxes = outputs['pred_boxes'][idx] # num_boxes, 2
+        avg_duration = torch.mean(src_boxes[:, 1])
+        center_point = src_boxes[:,0]
+        N = len(indices[-1][0])
+
+        losses = {}
+
+        if self.opt.use_pseudo_box and self.training:
+            # If generate peseudo ground truth boxes from alignment, use the alignment boxes as the target boxes
+            target_boxes = torch.cat([t['boxes_pseudo'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+            loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')
+            losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+            loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
+            box_ops.box_cl_to_xy(src_boxes),
+            box_ops.box_cl_to_xy(target_boxes)))
+            losses['loss_giou'] = loss_giou.sum() / num_boxes
+
+        if not self.opt.use_pseudo_box:
+            ## Squence Ordering loss
+            rank_margin = 0.01
+            pairs = torch.combinations(torch.arange(center_point.size(0)), 2)
+            rank_dist = center_point[pairs[:, 0]] - center_point[pairs[:, 1]] + rank_margin
+            # Make sure that the center points are ordered
+            rank_loss = torch.relu(rank_margin + rank_dist).mean()
+
+            losses['loss_ref_rank']  = rank_loss
+
+            ## Self IOU loss
+            prior_duration = 0.06
+            self_iou = torch.triu(box_ops.box_iou(box_ops.box_cl_to_xy(src_boxes),
+                                                box_ops.box_cl_to_xy(src_boxes))[0], diagonal=1)
+            sizes = [len(v[0]) for v in indices]
+            self_iou_split = 0
+            for i, c in enumerate(self_iou.split(sizes, -1)):
+                cc = c.split(sizes, -2)[i]
+                self_iou_split += cc.sum() / (0.5 * (sizes[i]) * (sizes[i]-1))
+            duration_constraint = torch.abs(prior_duration/(avg_duration + 1e-6) - 1)
+            self_iou_split += duration_constraint
+            
+            
+            losses['loss_self_iou'] = self_iou_split
+
+        return losses
+
+    @torch.no_grad()
+    def loss_cardinality(self, outputs, targets, indices, num_boxes):
+        """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
+        This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
+        """
+        pred_logits = outputs['pred_logits']
+        device = pred_logits.device
+        tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device)
+        # Count the number of predictions that are NOT "no-object" (which is the last class)
+        card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
+        card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
+        losses = {'cardinality_error': card_err}
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_src_permutation_idx2(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        src_idx2 = torch.cat([src for (_, src) in indices])
+        return (batch_idx, src_idx), src_idx2
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'boxes': self.loss_boxes,
+            'cardinality': self.loss_cardinality,
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
+
+    def forward(self, outputs, targets, others):
+        """ This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             targets: list of dicts, such that len(targets) == batch_size.
+                      The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        text_embed = others['text_embed'] # num_dec_layers, num_sentence, dim
+        event_embed = others['event_embed'] # num_dec_layers, num_query, dim
+        dim = event_embed.shape[-1]
+
+        # Retrieve the matching between the outputs of the last layer and the targets
+        # if self.opt.matcher_type == 'DTW':
+        #     last_indices = self.matcher(text_embed[-1], event_embed[-1].reshape(-1, dim))
+        # elif self.opt.matcher_type == 'Sim':
+        #     last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+        # else:
+        #     raise NotImplementedError('Align Criterion does not support:{}'.format(self.opt.matcher_type))
+        #breakpoint()
+        last_indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+        outputs['matched_indices'] = last_indices
+
+        num_boxes = sum(len(t["labels"]) for t in targets)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            kwargs = {}
+            losses.update(self.get_loss(loss, outputs, targets, last_indices, num_boxes, **kwargs))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            aux_indices = []
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(outputs, targets, text_embed[-1], event_embed[-1].reshape(-1, dim))
+                aux_indices.append(indices)
+                for loss in self.losses:
+                    kwargs = {}
+                    if loss == 'labels':
+                        # Logging is enabled only for the last layer
+                        kwargs['log'] = False
+                    l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            return losses, last_indices, aux_indices
+        return losses, last_indices
+
+class ContrastiveCriterion(nn.Module):
+    '''
+    Contrastive loss between event feature and caption feature
+    '''
+
+    def __init__(self, temperature=0.1, enable_cross_video_cl=False, enable_e2t_cl=False, enable_bg_for_cl=False):
+        super().__init__()
+        self.temperature = temperature
+        self.enable_cross_video_cl = enable_cross_video_cl
+        self.enable_e2t_cl = enable_e2t_cl
+        self.enable_bg_for_cl = enable_bg_for_cl
+
+    def forward_logits(self, text_embed, event_embed, bg_embed=None):
+        normalized_text_emb = F.normalize(text_embed, p=2, dim=1)
+        normalized_event_emb = F.normalize(event_embed, p=2, dim=1)
+        logits = torch.mm(normalized_text_emb, normalized_event_emb.t())
+        if bg_embed is not None:
+            bg_logits = torch.sum(normalized_event_emb * F.normalize(bg_embed, p=2), dim=1)
+            logits = torch.cat((logits, bg_logits.unsqueeze(0)), dim=0)
+        return logits
+
+
+    def forward(self, text_embed, event_embed, matching_indices, return_logits=False, bg_embed=None):
+
+        '''
+        :param text_embed: [(event_num, contrastive_hidden_size)], len = batch size
+                            total_event_number = sum of event number of each item in current batch
+        :param event_embed: (bsz, max_event_num, contrastive_hiddent_size), which need to be
+                            expand in this function
+        :param matching_indices: (bsz, event_num)
+        '''
+        batch_size, max_event_num, _ = event_embed.shape
+        event_embed, text_embed, gt_labels, gt_event_num = self._preprocess(event_embed, [text_embed], matching_indices)
+        raw_logits = self.forward_logits(text_embed, event_embed)
+        logits = raw_logits / self.temperature
+
+        if self.enable_cross_video_cl:
+            t2e_loss = F.cross_entropy(logits, gt_labels)
+            if self.enable_e2t_cl:
+                gt_label_matrix = torch.zeros(len(text_embed) + 1, len(event_embed), device=text_embed.device)
+                gt_label_matrix[torch.arange(len(gt_labels)), gt_labels] = 1
+                event_mask = gt_label_matrix.sum(dim=0) == 0
+                gt_label_matrix[-1, event_mask] = 1
+                e2t_gt_label = gt_label_matrix.max(dim=0)[1]
+                bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1)
+                e2t_logits = torch.cat((logits, bg_logits.unsqueeze(0) / self.temperature), dim=0)
+                if self.enable_bg_for_cl:
+                    e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label)
+                else:
+                    e2t_loss = F.cross_entropy(e2t_logits.t()[~event_mask], e2t_gt_label[~event_mask])
+                loss = 0.5 * (t2e_loss + e2t_loss)
+            else:
+                loss = t2e_loss
+        else:
+            loss = 0; base = 0
+            for i in range(batch_size):
+                current_gt_event_num = gt_event_num[i]
+                current_logits = logits[base: base + current_gt_event_num, i * max_event_num: (i + 1) * max_event_num]
+                current_gt_labels = gt_labels[base: base + current_gt_event_num]
+                t2e_loss = F.cross_entropy(current_logits, current_gt_labels)
+                if self.enable_e2t_cl:
+                    gt_label_matrix = torch.zeros(gt_event_num[i] + 1, max_event_num, device=text_embed.device)
+                    gt_label_matrix[torch.arange(current_gt_labels), current_gt_labels] = 1
+                    event_mask = gt_label_matrix.sum(dim=0) == 0
+                    e2t_gt_label = gt_label_matrix.max(dim=0)[1]
+                    bg_logits = torch.sum(F.normalize(event_embed, p=2) * F.normalize(bg_embed, p=2), dim=1)
+                    e2t_logits = torch.cat((current_logits, bg_logits.unsqueeze(0) / self.temperature), dim=0)
+                    if self.enable_bg_for_cl:
+                        e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label)
+                    else:
+                        e2t_loss = F.cross_entropy(e2t_logits.t(), e2t_gt_label, ignore_index=len(text_embed), reduction='sum') / (1e-5 + sum(~event_mask))
+                    loss += 0.5 * (t2e_loss + e2t_loss)
+                else:
+                    loss += t2e_loss
+                base += current_gt_event_num
+            loss = loss / batch_size
+        # pdb.set_trace()
+        if return_logits:
+            return loss, raw_logits
+        return loss
+
+
+    def _preprocess(self, event_embed, text_embed, matching_indices):
+        '''
+        Flatten event_embed of a batch, get gt label
+
+        :param matching_indices: [(event_num, )]  len = bsz
+        '''
+        batch_size, max_event_num, f_dim = event_embed.shape
+        gt_labels = []
+        text_features = []
+        gt_event_num = []
+        event_features = event_embed.view(-1, f_dim)
+        for i in range(batch_size):
+            base = i * max_event_num if self.enable_cross_video_cl else 0
+            feat_ids, cap_ids = matching_indices[i]
+            gt_event_num.append(len(feat_ids))
+            text_features.append(text_embed[i][cap_ids])
+            gt_labels.append(feat_ids + base)
+        text_features = torch.cat(text_features, dim=0)
+        gt_labels = torch.cat(gt_labels, dim=0)
+        gt_labels = gt_labels.to(event_embed.device)
+            
+        return event_features, text_features, gt_labels, gt_event_num
+
+def cross_entropy_with_gaussian_mask(inputs, targets, opt, weight):
+    gau_mask = opt.lloss_gau_mask
+    beta = opt.lloss_beta
+
+    N_, max_seq_len = targets.shape
+    gassian_mu = torch.arange(max_seq_len, device=inputs.device).unsqueeze(0).expand(max_seq_len,
+                                                                                     max_seq_len).float()
+    x = gassian_mu.transpose(0, 1)
+    gassian_sigma = 2
+    mask_dict = torch.exp(-(x - gassian_mu) ** 2 / (2 * gassian_sigma ** 2))
+    _, ind = targets.max(dim=1)
+    mask = mask_dict[ind]
+
+    loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none", weight= 1 - weight)
+    if gau_mask:
+        coef = targets + ((1 - mask) ** beta) * (1 - targets)
+    else:
+        coef = targets + (1 - targets)
+    loss = loss * coef
+    loss = loss.mean(1)
+    return loss.mean()
+
+def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
+    """
+    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
+    Args:
+        inputs: A float tensor of arbitrary shape.
+                The predictions for each example.
+        targets: A float tensor with the same shape as inputs. Stores the binary
+                 classification label for each element in inputs
+                (0 for the negative class and 1 for the positive class).
+        alpha: (optional) Weighting factor in range (0,1) to balance
+                positive vs negative examples. Default = -1 (no weighting).
+        gamma: Exponent of the modulating factor (1 - p_t) to
+               balance easy vs hard examples.
+    Returns:
+        Loss tensor
+    """
+
+    prob = inputs.sigmoid()
+    ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") # with_logits func calculates sigmoid and CE jointly
+    p_t = prob * targets + (1 - prob) * (1 - targets)
+    loss = ce_loss * ((1 - p_t) ** gamma)
+
+    if alpha >= 0:
+        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
+        loss = alpha_t * loss
+
+    return loss.mean(1).sum() / num_boxes
+
+def regression_loss(inputs, targets, opt, weight):
+    inputs = F.relu(inputs) + 2
+    max_id = torch.argmax(targets, dim=1)
+    if opt.regression_loss_type == 'l1':
+        loss = nn.L1Loss()(inputs[:, 0], max_id.float())
+    elif opt.regression_loss_type == 'l2':
+        loss = nn.MSELoss()(inputs[:, 0], max_id.float())
+    return loss
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/deformable_transformer.py b/yc2_univl/backup/pdvc/deformable_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e9b742061b166e0badc41db80f5423b0e46a746
--- /dev/null
+++ b/yc2_univl/backup/pdvc/deformable_transformer.py
@@ -0,0 +1,496 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+import copy
+import math
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.init import xavier_uniform_, constant_, normal_
+
+from misc.detr_utils.misc import  inverse_sigmoid
+from pdvc.ops.modules import MSDeformAttn
+
+
+class DeformableTransformer(nn.Module):
+    def __init__(self, d_model=256, nhead=8,
+                 num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1,
+                 activation="relu", return_intermediate_dec=False,
+                 num_feature_levels=4, dec_n_points=4, enc_n_points=4, use_anchor=False):
+        super().__init__()
+
+        self.d_model = d_model
+        self.nhead = nhead
+        self.use_anchor = use_anchor
+
+        self.no_encoder = (num_encoder_layers == 0)
+        self.num_feature_levels = num_feature_levels
+
+        encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, enc_n_points)
+        self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers)
+
+        decoder_layer = DeformableTransformerDecoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, dec_n_points)
+        self.decoder = DeformableTransformerDecoder(decoder_layer, num_decoder_layers, return_intermediate_dec, d_model, use_anchor)
+
+        self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model))
+
+        self.pos_trans = nn.Linear(d_model, d_model * 2)
+        self.pos_trans_norm = nn.LayerNorm(d_model * 2)
+        self.reference_points = nn.Linear(d_model, 1)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for m in self.modules():
+            if isinstance(m, MSDeformAttn):
+                m._reset_parameters()
+        # if not self.use_anchor:
+        xavier_uniform_(self.reference_points.weight.data, gain=1.0)
+        constant_(self.reference_points.bias.data, 0.)
+        normal_(self.level_embed)
+
+
+    def get_proposal_pos_embed(self, proposals):
+        num_pos_feats = 256
+        temperature = 10000
+        scale = 2 * math.pi
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device)
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)
+        # N, L, 2
+        proposals = proposals.sigmoid() * scale
+        # N, L, 2, 256
+        pos = proposals[:, :, :, None] / dim_t
+        # N, L, 2, 128, 2
+        pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()), dim=4).flatten(2)
+        return pos
+    
+    def get_proposal_pos_embed_1d(self, proposals):
+        num_pos_feats = 512
+        temperature = 10000
+        scale = 2 * math.pi
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device) 
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) 
+
+        # N, L
+        proposals = proposals.sigmoid() * scale
+        # N, L, 512
+        pos = proposals[:, None] / dim_t 
+
+        pos = torch.stack((pos[:, 0::2].sin(), pos[:, 1::2].cos()), dim=2).flatten(1) 
+        return pos 
+
+    def get_valid_ratio(self, mask):
+        valid_ratio_L = torch.sum(~mask, 1).float() / mask.shape[1]
+        return valid_ratio_L
+
+    def prepare_encoder_inputs(self, srcs, masks, pos_embeds):
+        # prepare input for encoder
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        temporal_shapes = []
+        for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)):
+            """
+            lvl: (bs, )
+            src: (bs, c, L )
+            mask: (bs, L)
+            pos_embed: (bs, d_m, L)
+            """
+            bs, c, L = src.shape
+            temporal_shapes.append(L)
+            src = src.transpose(1, 2)  # （bs, L, c）
+            pos_embed = pos_embed.transpose(1, 2)  # #（bs, L, d_m）
+            lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+            mask_flatten.append(mask)
+        src_flatten = torch.cat(src_flatten, 1)  # (lvl_num, bs, wh, c)
+        mask_flatten = torch.cat(mask_flatten, 1)  # (lvl_num, bs, wh)
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)  # (lvl_num, bs, wh, d_m)
+        temporal_shapes = torch.as_tensor(temporal_shapes, dtype=torch.long, device=src_flatten.device)  # (lvl_num, 2)
+        level_start_index = torch.cat((temporal_shapes.new_zeros((1,)), temporal_shapes.cumsum(0)[
+                                                                       :-1]))  # prod: [w0h0, w0h0+w1h1, w0h0+w1h1+w2h2, ...]
+        valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks],
+                                   1)  # (bs, lvl_num, 2), where 2 means (h_rate, and w_rate)， all values <= 1
+
+        return src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten
+
+    def forward_encoder(self, src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                        mask_flatten):
+        # encoder
+        if self.no_encoder:
+            memory = src_flatten
+        else:
+            memory = self.encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                                  mask_flatten)
+
+        return memory
+
+    def prepare_decoder_input_query(self, memory, query_embed):
+        bs, _, _ = memory.shape
+        query_embed, tgt = torch.chunk(query_embed, 2, dim=1)
+        query_embed = query_embed.unsqueeze(0).expand(bs, -1, -1)
+        tgt = tgt.unsqueeze(0).expand(bs, -1, -1)
+        reference_points = self.reference_points(query_embed).sigmoid() # (bs, object_query, 1)
+        init_reference_out = reference_points  # (bs, object_query, 1)
+        return init_reference_out, tgt, reference_points, query_embed
+    
+    def prepare_init_anchor_and_query(self, anchor_embed, hidden_dim, random_anchor_init=False, prior_anchor_duration_init=False, prior_duration=0.048):
+        num_queries = anchor_embed.weight.shape[0]
+        # query_embed = nn.Embedding(num_queries, hidden_dim)
+        if random_anchor_init:
+            anchor_embed.weight.data[:, :1] = torch.linspace(0, 1, num_queries).unsqueeze(1)
+            anchor_embed.weight.data[:, :1] = inverse_sigmoid(anchor_embed.weight.data[:, :1])
+            print('Initilize the anchor center point with uniform distribution')
+            #self.anchor_embed.weight.data[:, :1].requires_grad = False # DAB-anchor set this to be False
+            anchor_embed.weight.data[:, :1].requires_grad = True # I set it to be True
+            # breakpoint()
+        if prior_anchor_duration_init:
+            # TODO: add prior anchor duration initialization, the below implementation is not correct
+            torch.nn.init.constant_(anchor_embed.weight.data[:, 1:], prior_duration)
+            anchor_embed.weight.data[:, 1:] = inverse_sigmoid(anchor_embed.weight.data[:, 1:])
+            anchor_embed.weight.data[:, 1:].requires_grad = True
+            print('Initilize the anchor duration point with: {}'.format(prior_duration))
+        reference_points = anchor_embed.weight.data.detach().clone().sigmoid().unsqueeze(0).expand(1, -1, -1) 
+        topk_coords_unact = inverse_sigmoid(reference_points[0, :, 0])
+        query_embed = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact))) # Position embedding receives non-sigmoided coordinates
+        # breakpoint()
+        return query_embed
+
+    def prepare_decoder_input_anchor(self, memory, query_anchor):
+        bs, _, _ = memory.shape
+        query_embed, anchor = query_anchor
+        position_embedding, tgt = torch.chunk(query_embed, 2, dim=1)
+        position_embedding = position_embedding.unsqueeze(0).expand(bs, -1, -1)
+        tgt = tgt.unsqueeze(0).expand(bs, -1, -1)
+        reference_points = anchor.sigmoid().unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, 2)
+        # tgt = query_embed[..., :self.d_model]
+        # tgt = tgt.unsqueeze(0).expand(bs, -1, -1) # (bs, num_queries, query_dim)
+        init_reference_out = reference_points
+
+        # topk_coords_unact = inverse_sigmoid(reference_points)
+        # position_embeding = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed_1d(topk_coords_unact)))
+        return init_reference_out, tgt, reference_points, position_embedding
+
+    def prepare_decoder_input_prior(self, proposals, num_queries=100):
+        '''
+        :param proposals: (batch, num_sentence, 2)
+        '''
+        bs,_,_ = proposals.shape
+        # Uniformly generate normalized coordinates according to number of sentences
+        reference_points_list = []
+        for i in range(bs):
+        # Generate N-1 points from 0~1 for each sentence uniformly
+            ns = proposals[i].shape[0] # number of sentences
+            reference_points_c = torch.linspace(0,1, 2*ns+1, dtype=torch.float32, device=proposals.device)
+            reference_points_c = reference_points_c[1:-1:2] # (num_sentence,)
+            reference_points_d = torch.Tensor([1.0/ns]).to(proposals.device).repeat(ns) # (num_sentence,)
+            reference_points = torch.stack([reference_points_c, reference_points_d], -1) # (num_sentence, 2)
+            # Padding the reference point to the same length
+            
+            num_query_per_sentence = num_queries // ns
+            reference_points = reference_points.repeat(1, num_query_per_sentence).reshape(-1,2)  # (num_queries, 2)
+            if num_queries % ns != 0: # Padding with zeros
+                num_padding = num_queries - num_query_per_sentence * ns
+                padding = torch.Tensor([[1.0, 1.0/ns]]).to(proposals.device).repeat(num_padding, 1)
+                reference_points = torch.cat([reference_points, padding], 0)
+            reference_points_list.append(reference_points)
+        reference_points = torch.stack(reference_points_list, 0) # (batch, num_queries, 2)
+        init_reference_out = reference_points[:,:,:1]
+        topk_coords_unact = inverse_sigmoid(reference_points)
+        pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim)
+        query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2)
+        return init_reference_out, tgt, reference_points[:,:,:1], query_embed
+
+    def prepare_decoder_input_proposal(self, gt_reference_points):
+        '''
+        :param gt_reference_points: (batch, num_sentence, 2)
+        '''
+        #breakpoint()
+        topk_coords_unact = inverse_sigmoid(gt_reference_points)
+        reference_points = gt_reference_points
+        init_reference_out = reference_points
+        pos_trans_out = self.pos_trans_norm(self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact))) # (bs, num_sentence, 2*hidden_dim)
+        query_embed, tgt = torch.chunk(pos_trans_out, 2, dim=2) # Split to query_embed and position_embed (bs, num_sentence, hidden_dim, 2)
+        return init_reference_out, tgt, reference_points, query_embed
+
+    def forward_decoder(self, *kargs):
+        hs, inter_references_out = self.decoder(*kargs)
+        return hs, inter_references_out
+
+
+class DeformableTransformerEncoderLayer(nn.Module):
+    def __init__(self,
+                 d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+
+        # self attention
+        self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout2 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout3 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, src):
+        src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
+        src = src + self.dropout3(src2)
+        src = self.norm2(src)
+        return src
+
+    def forward(self, src, pos, reference_points, temporal_shapes, level_start_index, padding_mask=None):
+        # self attention
+        src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, temporal_shapes, level_start_index,
+                              padding_mask)
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+
+        # ffn
+        src = self.forward_ffn(src)
+
+        return src
+
+
+class DeformableTransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+
+    @staticmethod
+    def get_reference_points(temporal_shapes, valid_ratios, device):
+        reference_points_list = []
+        for lvl, (L_) in enumerate(temporal_shapes):
+            ref = torch.linspace(0.5, L_ - 0.5, L_, dtype=torch.float32, device=device)
+            ref = ref.reshape(-1)[None] / (valid_ratios[:, None, lvl] * L_)
+            reference_points_list.append(ref)
+        reference_points = torch.cat(reference_points_list, 1)
+        reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+        reference_points = reference_points[:,:,:,None]
+        return reference_points
+
+    def forward(self, src, temporal_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None):
+        output = src
+        reference_points = self.get_reference_points(temporal_shapes, valid_ratios, device=src.device)
+        for _, layer in enumerate(self.layers):
+            output = layer(output, pos, reference_points, temporal_shapes, level_start_index, padding_mask)
+
+        return output
+
+
+class DeformableTransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+
+        # cross attention
+        self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # self attention
+        self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout3 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout4 = nn.Dropout(dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, tgt):
+        tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout4(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+    def forward(self, tgt, query_pos, reference_points, src, src_temporal_shapes, level_start_index,
+                src_padding_mask=None, query_mask=None):
+        # self attention
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), tgt.transpose(0, 1), key_padding_mask=~query_mask)[
+            0].transpose(0, 1)
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+
+        # cross attention
+        tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos),
+                               reference_points,
+                               src, src_temporal_shapes, level_start_index, src_padding_mask)
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+
+        # ffn
+        tgt = self.forward_ffn(tgt)
+        return tgt
+
+
+class DeformableTransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, return_intermediate=False, d_model=256, use_anchor=False):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.return_intermediate = return_intermediate
+        # hack implementation for iterative bounding box refinement and two-stage Deformable DETR
+        self.bbox_head = None
+        self.use_anchor = use_anchor
+        self.d_model = d_model
+        # if use_anchor:
+        #     self.anchor_head = MLP(d_model, d_model, d_model, 2)
+        #     self.scale_head = MLP(d_model, d_model, d_model, 2)
+
+
+    def forward(self, tgt, reference_points, src, src_temporal_shapes, src_level_start_index, src_valid_ratios,
+                query_pos=None, src_padding_mask=None, query_padding_mask=None, disable_iterative_refine=False):
+        output = tgt
+
+        intermediate = []
+        intermediate_reference_points = []
+        bs = tgt.shape[0]
+        for lid, layer in enumerate(self.layers):
+            if reference_points.shape[-1] == 2:
+                reference_points_input = reference_points[:, :, None] \
+                                         * torch.stack([src_valid_ratios, src_valid_ratios], -1)[:, None]
+            else:
+                assert reference_points.shape[-1] == 1
+                reference_points_input = reference_points[:, :, None] * src_valid_ratios[:, None, :, None]
+            # if self.use_anchor:
+                # query_sine_embed = gen_sineembed_for_position(reference_points_input[:,:,0,:], self.d_model)
+                # raw_query_pos = self.anchor_head(query_sine_embed) # num_query, bs, 256
+                # query_scale_embed = self.scale_head(output) if lid != 0 else 1
+                # query_pos = query_scale_embed * raw_query_pos
+            output = layer(output, query_pos, reference_points_input, src, src_temporal_shapes, src_level_start_index,
+                           src_padding_mask, query_padding_mask)
+
+            if self.use_anchor:
+                assert reference_points.shape[-1] == 2
+                
+            # hack implementation for iterative bounding box refinement
+            if disable_iterative_refine:
+                reference_points = reference_points
+            else:
+                if (self.bbox_head is not None):
+                    tmp = self.bbox_head[lid](output)
+                    if reference_points.shape[-1] == 2:
+                        new_reference_points = tmp + inverse_sigmoid(reference_points)
+                        new_reference_points = new_reference_points.sigmoid()
+                    else:
+                        assert reference_points.shape[-1] == 1
+                        new_reference_points = tmp
+                        new_reference_points[..., :1] = tmp[..., :1] + inverse_sigmoid(reference_points)
+                        new_reference_points = new_reference_points.sigmoid()
+                    reference_points = new_reference_points.detach()
+                else:
+                    reference_points = reference_points
+
+            if self.return_intermediate:
+                intermediate.append(output)
+                intermediate_reference_points.append(reference_points)
+        # breakpoint()
+
+        if self.return_intermediate:
+            return torch.stack(intermediate), torch.stack(intermediate_reference_points)
+
+        return output, reference_points
+
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+
+
+def gen_sineembed_for_position(pos_tensor, d_model):
+    # n_query, bs, _ = pos_tensor.size()
+    # sineembed_tensor = torch.zeros(n_query, bs, 256)
+    hidden_dim = d_model // 2
+    scale = 2 * math.pi
+    dim_t = torch.arange(hidden_dim, dtype=torch.float32, device=pos_tensor.device)
+    dim_t = 10000 ** (2 * (dim_t // 2) / hidden_dim)
+    x_embed = pos_tensor[:, :, 0] * scale
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+    if pos_tensor.size(-1) == 1:
+        pos = pos_x
+    elif pos_tensor.size(-1) == 2:
+        w_embed = pos_tensor[:, :, 1] * scale
+        pos_w = w_embed[:, :, None] / dim_t
+        pos_w = torch.stack((pos_w[:, :, 0::2].sin(), pos_w[:, :, 1::2].cos()), dim=3).flatten(2)
+
+        pos = torch.cat((pos_x, pos_w), dim=2)
+    else:
+        raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1)))
+    return pos
+
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+
+def build_deforamble_transformer(args):
+    return DeformableTransformer(
+        d_model=args.hidden_dim,
+        nhead=args.nheads,
+        num_encoder_layers=args.enc_layers,
+        num_decoder_layers=args.dec_layers,
+        dim_feedforward=args.transformer_ff_dim,
+        dropout=args.transformer_dropout_prob,
+        activation="relu",
+        return_intermediate_dec=True,
+        num_feature_levels=args.num_feature_levels,
+        dec_n_points=args.dec_n_points,
+        enc_n_points=args.enc_n_points,
+        use_anchor=args.use_anchor)
diff --git a/yc2_univl/backup/pdvc/dp/CFSA.py b/yc2_univl/backup/pdvc/dp/CFSA.py
new file mode 100644
index 0000000000000000000000000000000000000000..135defd0c1a48435405a27e2cc12532d86b5d79a
--- /dev/null
+++ b/yc2_univl/backup/pdvc/dp/CFSA.py
@@ -0,0 +1,327 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from torch.nn import CrossEntropyLoss
+import math
+
+def compute_cls_loss(pred, labels, use_cosface=False):
+    if use_cosface:
+        # CosFace Loss
+        s, m = 30.0, 0.4
+        cos_value = torch.diagonal(pred.transpose(0, 1)[labels])
+        numerator = s * (cos_value - m)
+        excl = torch.cat([torch.cat((pred[i, :y], pred[i, y + 1:])).unsqueeze(0) for i, y in enumerate(labels)], dim=0)
+        denominator = torch.exp(numerator) + torch.sum(torch.exp(s * excl), dim=1)
+        L = numerator - torch.log(denominator)
+        loss = -torch.mean(L)
+    else:
+        # Softmax Loss
+        criterion = CrossEntropyLoss().cuda()
+        loss = criterion(pred, labels)
+
+    return loss
+
+
+def frame_blank_align_loss(seq_features1, seq_features2, step_num):
+    seq_features1 = seq_features1[:, 1:]
+    blank2 = seq_features2[:, :1]
+    seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    K = 2 * step_num + 1
+    sparse_seq_features2 = torch.cat((blank2, seq_features2[:, [5, 7, 8, 9, 11, 12, 13, 14], :]), dim=1)
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, sparse_seq_features2) / math.sqrt(C)).log_softmax(-1)
+
+    D_pre = torch.full((B, K), fill_value=float('-99999999'), device=device)
+    D_pre[:, 0] = pred[:, 0, 0]
+    D_pre[:, 1] = pred[:, 0, 1]
+    
+    for t in range(1, T):
+        D_cur = torch.full((B, K), fill_value=float('-99999999'), device=device)
+        D_cur[:, 0] = D_pre[:, 0] + pred[:, t, 0]
+        D_cur[:, 1] = torch.logsumexp(torch.stack([D_pre[:, 0], D_pre[:, 1]]), dim=0) + pred[:, t, 1]
+        
+        # blank term
+        blank_pre_ind = torch.arange(1, K, 2)[None, :].repeat(B, 1)
+        blank_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_pre_ind]
+        
+        blank_cur_ind = torch.arange(2, K, 2)[None, :].repeat(B, 1)
+        blank_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), blank_cur_ind]
+        
+        blank_log_prob = torch.logsumexp(torch.stack([blank_pre, blank_cur]), dim=0)
+        D_cur[:, 2:][:, ::2] = blank_log_prob + pred[:, t, 0][:, None].repeat(1, blank_log_prob.shape[-1])
+        
+        # step term
+        step_prepre_ind = torch.arange(1, K, 2)[None, :-1].repeat(B, 1)
+        step_prepre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_prepre_ind]
+        
+        step_pre_ind = torch.arange(2, K, 2)[None, :-1].repeat(B, 1)
+        step_pre = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_pre_ind]
+        
+        step_cur_ind = torch.arange(3, K, 2)[None, :].repeat(B, 1)
+        step_cur = D_pre[torch.arange(B, device=device).unsqueeze(-1), step_cur_ind]
+        
+        step_log_prob = torch.logsumexp(torch.stack([step_prepre, step_pre, step_cur]), dim=0)
+        D_cur[:, 2:][:, 1::2] = step_log_prob + pred[:, t, 2:]
+        D_pre = D_cur
+
+    fsa_distance = -torch.logsumexp(D_cur[:, -2:], dim=-1) / 13
+    loss = fsa_distance.mean(0)
+    
+    return loss
+
+
+def consist_step_mining(seq_features1, seq_features2, step_num):
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num - 1
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k >= 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+    
+    repeat_times1 = (segment1[:, 1:] - segment1[:, :-1]).flatten()
+    repeat_target1 = torch.arange(step_num, device=device).repeat((B, ))
+    step_index1 = repeat_target1.repeat_interleave(repeat_times1).reshape(B, T)
+    
+    repeat_times2 = (segment2[:, 1:] - segment2[:, :-1]).flatten()
+    repeat_target2 = torch.arange(step_num, device=device).repeat((B, ))
+    step_index2 = repeat_target2.repeat_interleave(repeat_times2).reshape(B, T)
+    
+    div_term = torch.exp(torch.arange(0, C, 2, device=device) * -(math.log(10000.0) / C))
+    
+    pos_emb1 = torch.zeros(B, T, C, device=device)
+    pos_emb1[:, :, 0::2] = torch.sin(step_index1.unsqueeze(-1) * div_term)
+    pos_emb1[:, :, 1::2] = torch.cos(step_index1.unsqueeze(-1) * div_term)
+    
+    pos_emb2 = torch.zeros(B, T, C, device=device)
+    pos_emb2[:, :, 0::2] = torch.sin(step_index2.unsqueeze(-1) * div_term)
+    pos_emb2[:, :, 1::2] = torch.cos(step_index2.unsqueeze(-1) * div_term)
+    
+    return pos_emb1, pos_emb2, segment1[:, :-1]+1, segment2[:, :-1]+1
+
+
+
+def consist_step_mining_train(seq_features1, seq_features2, step_num, pair_labels):
+    # seq_features1 = seq_features1[:, 1:]
+    # seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] \
+            - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \
+                - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1)
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+
+    for k in range(1, T):
+        tmp = D[:, k-1, None, None, :, :] + block_mat
+        D[:, k] = tmp.flatten(3).max(-1).values
+        D_ind[:, k] = tmp.flatten(3).max(-1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k > 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+        
+    final_result = D[:, :, T-1, T-1]
+    
+    video_seg1 = segment1[:, :-1] + 1
+    video_seg2 = segment2[:, :-1] + 1
+    
+    # loss_step = (-(pair_labels * final_result.max(dim=-1).values)).sum()
+    loss_step = -(pair_labels * final_result.max(dim=-1).values).mean()
+    
+    return loss_step, video_seg1, video_seg2
+
+
+
+def consist_step_mining_inference(seq_features1, seq_features2, step_num):
+    seq_features1 = seq_features1[:, 1:]
+    seq_features2 = seq_features2[:, 1:]
+    (B, T, C), device = seq_features1.shape, seq_features1.device
+    
+    # pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] \
+            - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] \
+                - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    top, left, bottom, right = torch.meshgrid(*[torch.arange(T, device=device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((bottom >= top) | (right >= left)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    segment1, segment2 = [torch.full((B, 1), T, dtype=torch.long, device=device)]*2
+    k = step_num
+    i, j, a, b = [torch.full((B, 1), T-1, dtype=torch.long, device=device)]*4
+    
+    while k > 0:
+        ind = D_ind[range(B), k, i.squeeze(), j.squeeze()][:, None]
+        a = ind // T
+        b = ind % T
+        segment1 = torch.cat([a, segment1], dim=-1)
+        segment2 = torch.cat([b, segment2], dim=-1)
+        i, j, k = a, b, k-1
+        
+    return segment1[:, :-1] + 1, segment2[:, :-1] + 1
+
+
+def step_align_loss(seq_features1, seq_features2):
+    B, T, C = seq_features1.shape
+    # the similarity matrix: 16 * 16
+    pred = (torch.einsum('bic,bjc->bij', seq_features1, seq_features2) / math.sqrt(C)).softmax(-1)
+    # pred = torch.cosine_similarity(seq_features1.unsqueeze(2), seq_features2.unsqueeze(1), dim=-1)
+    pred = pred.cumsum(-2).cumsum(-1)
+    
+    D = torch.zeros((B, T, T, T), device=seq_features1.device)
+    D_ind = torch.zeros((B, T, T, T), dtype=torch.long, device=pred.device)
+    
+    D[:, 0] = pred / torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    
+    area = torch.ones_like(pred).cumsum(-2).cumsum(-1)
+    area = (area[:, :, :, None, None] - area[:, :, None, None, :] - area.transpose(1,2)[:, None, :, :, None] + area[:, None, None, :, :])
+    block_mat = (pred[:, :, :, None, None] - pred[:, :, None, None, :] - pred.transpose(1,2)[:, None, :, :, None] + pred[:, None, None, :, :])
+    
+    i, j, a, b = torch.meshgrid(*[torch.arange(T, device=seq_features1.device)]*4)
+    area = area.clamp_min(1).sqrt()
+
+    block_mat = block_mat.masked_fill(((a >= i) | (b >= j)).unsqueeze(0), float('-inf')) / area
+    
+    for k in range(1, T):
+        # tmp = ((D[:, k-1, None, None, :, :] * k) + block_mat) / (k+1)
+        tmp = D[:, k-1, None, None, :, :] + block_mat
+        D[:, k] = torch.max(tmp.flatten(3), -1).values
+        D_ind[:, k] = torch.max(tmp.flatten(3), -1).indices
+    
+    final_result = D[:, :, T-1, T-1]
+    return -(final_result.max(dim=-1).values).mean(), final_result.max(dim=-1).indices, D_ind
+
+
+def single_align_loss(seq_features1, seq_features2):
+    device = seq_features1.device
+    T, C = seq_features1.shape
+    pred = (torch.einsum('ic,jc->ij', seq_features1, seq_features2) / math.sqrt(C)).log_softmax(-1)
+    
+    ZERO_PAD = torch.zeros((1), device=device)
+    ONE_PAD = torch.ones((1), device=device)
+    S = seq_features2.shape[0]
+
+    target = (torch.arange(S, device=device))
+    
+    D_TABLE = ONE_PAD.log()
+    for t in range(T):
+        D_VEC_1 = torch.logsumexp(torch.stack([D_TABLE[1:t+1], D_TABLE[:-1][:t]]), 0) + pred[t, target[:t]]
+        D_VEC_2 = D_TABLE[t:t+1] + pred[t, target[t:t+1]]
+        D_TABLE = torch.cat([ZERO_PAD.log(), D_VEC_1, D_VEC_2], dim=-1)
+    # changed by hotel: remove " / s"
+    ctc_distance = -D_TABLE[S]
+    return ctc_distance
+
+
+def frame2varstep_loss(seq_features1, seq_features2, video_seg):
+    B, T, C = seq_features1.shape
+    losses = []
+    for batch in range(B):
+        seq_feature1 = seq_features1[batch]
+        
+        cur_seg = video_seg[batch]
+        cur_seg = cur_seg[:-1] + 1
+        sparse_feature2 = seq_features2[batch, cur_seg, :]
+        frame_loss = single_align_loss(seq_feature1, sparse_feature2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1).mean(-1)
+
+
+def frame2varstep_dist(seq_features1, seq_features2, video_seg):
+    B, T, C = seq_features1.shape
+    losses = []
+    for batch in range(B):
+        seq_feature1 = seq_features1[batch]
+        
+        cur_seg = video_seg[batch]
+        cur_seg = cur_seg[:-1] + 1
+        sparse_feature2 = seq_features2[batch, cur_seg, :]
+        frame_loss = single_align_loss(seq_feature1, sparse_feature2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1)
+
+
+def frame2learnedstep_dist(frame_feats1, step_feats2):
+    B, T, C = frame_feats1.shape
+    losses = []
+    for batch in range(B):
+        frame_feat1 = frame_feats1[batch]
+        step_feat2 = step_feats2[batch]
+        # step_feat2 = step_feat2[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
+        frame_loss = single_align_loss(frame_feat1, step_feat2)
+        losses.append(frame_loss)
+        
+    return torch.stack(losses, dim=-1)
diff --git a/yc2_univl/backup/pdvc/dp/__init__.py b/yc2_univl/backup/pdvc/dp/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd28dccf2f11d713b40d4e237cb5a055bf54ca5d
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1197f07fc41ae6f41b581ebd13f30b674234acf4
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24490f9a2f7cc151dc46f67b4d4ae214dba5c47a
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/__init__.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3b35d663b4275176bf9f37c5dff954afd66df0e6
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fe93fd162f629560d23a2791ff3dab2c276d70c
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f2ae8f9d246202b485f89aa690174225dc2e66e
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/dp_utils.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e7d1ad496851d504c4b5de3cabed3465262cf89
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af2c6ca1bfc47fc34f69aaeee119c1c439fdea4b
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..214dc29706641783b09e447117f540f723ec6868
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/exact_dp.cpython-39.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d84ee83c249b2c327db4180485c62581e0bcb345
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae95ac7acddab327941068e44fcc974789c6d059
Binary files /dev/null and b/yc2_univl/backup/pdvc/dp/__pycache__/soft_dp.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/dp/dp_utils.py b/yc2_univl/backup/pdvc/dp/dp_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1dcdb6e6cb0385b1862aff36c779cdda89cf563
--- /dev/null
+++ b/yc2_univl/backup/pdvc/dp/dp_utils.py
@@ -0,0 +1,402 @@
+import numpy as np
+import torch
+import math
+
+from itertools import product
+from torch import log, exp
+import torch.nn.functional as F
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def compute_all_costs(
+    z_features,
+    x_features,
+    gamma_xz,
+    drop_cost_type,
+    keep_percentile,
+    l2_normalize=False,
+    given_baseline_logits=None,
+    return_baseline=False,
+):
+    """This function computes pairwise match and individual drop costs used in Drop-DTW
+
+    Parameters
+    __________
+
+    sample: dict
+        sample dictionary
+    distractor: torch.tensor of size [d] or None
+        Background class prototype. Only used if the drop cost is learnable.
+    drop_cost_type: str
+        The type of drop cost definition, i.g., learnable or logits percentile.
+    keep_percentile: float in [0, 1]
+        if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile
+    l2_normalize: bool
+        wheather to normalize clip and step features before computing the costs
+    """
+
+    if l2_normalize:
+        x_features = F.normalize(x_features, p=2, dim=1)
+        z_features = F.normalize(z_features, p=2, dim=1)
+
+    sim = z_features @ x_features.T
+
+    if drop_cost_type == "logit":
+        if keep_percentile > 1:
+            baseline_logit = sim.min().detach() - 1
+        else:
+            k = max([1, int(torch.numel(sim) * keep_percentile)])
+            baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+        baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+        sims_ext = torch.cat([sim, baseline_logits], dim=0)
+    else:
+        assert False, f"No such drop mode {drop_cost_type}"
+
+    softmax_sims = torch.nn.functional.softmax(sims_ext / gamma_xz, dim=0)
+    matching_probs, drop_probs = softmax_sims[:-1], softmax_sims[-1]
+    zx_costs = -torch.log(matching_probs + 1e-5)
+    drop_costs = -torch.log(drop_probs + 1e-5)
+    return zx_costs, drop_costs, drop_probs
+
+
+def compute_double_costs(
+    z_features,
+    x_features,
+    gamma_xz,
+    drop_cost_type,
+    keep_percentile,
+    l2_normalize=False,
+    return_baseline=False,
+):
+    """This function computes pairwise match and individual drop costs used in Drop-DTW
+
+    Parameters
+    __________
+
+    sample: dict
+        sample dictionary
+    distractor: torch.tensor of size [d] or None
+        Background class prototype. Only used if the drop cost is learnable.
+    drop_cost_type: str
+        The type of drop cost definition, i.g., learnable or logits percentile.
+    keep_percentile: float in [0, 1]
+        if drop_cost_type == 'logit', defines drop (keep) cost threshold as logits percentile
+    l2_normalize: bool
+        wheather to normalize clip and step features before computing the costs
+    """
+
+    z_features, frame_features = z_features, x_features
+    if l2_normalize:
+        x_features = F.normalize(frame_features, p=2, dim=1)
+        z_features = F.normalize(z_features, p=2, dim=1)
+    sim = z_features @ x_features.T
+
+    if drop_cost_type == "logit":
+        k = max([1, int(torch.numel(sim) * keep_percentile)])
+        baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    else:
+        assert False, f"No such drop mode {drop_cost_type}"
+    sim_ext = F.pad(sim, (0, 1, 0, 1), value=baseline_logit)
+
+    softmax_sims = torch.nn.functional.softmax(sim_ext.reshape(-1) / gamma_xz, dim=0).reshape(sim_ext.shape)
+    matching_probs, x_drop_probs, z_drop_probs = softmax_sims[:-1, :-1], softmax_sims[-1, :-1], softmax_sims[:-1, -1]
+    zx_costs = -torch.log(matching_probs + 1e-5)
+    x_drop_costs = -torch.log(x_drop_probs + 1e-5)
+    z_drop_costs = -torch.log(z_drop_probs + 1e-5)
+    return zx_costs, x_drop_costs, z_drop_costs
+
+
+class VarTable:
+    def __init__(self, dims, dtype=torch.float, device=device):
+        self.dims = dims
+        d1, d2, d_rest = dims[0], dims[1], dims[2:]
+
+        self.vars = []
+        for i in range(d1):
+            self.vars.append([])
+            for j in range(d2):
+                var = torch.zeros(d_rest).to(dtype).to(device)
+                self.vars[i].append(var)
+
+    def __getitem__(self, pos):
+        i, j = pos
+        return self.vars[i][j]
+
+    def __setitem__(self, pos, new_val):
+        i, j = pos
+        if self.vars[i][j].sum() != 0:
+            assert False, "This cell has already been assigned. There must be a bug somwhere."
+        else:
+            self.vars[i][j] = self.vars[i][j] + new_val
+
+    def show(self):
+        device, dtype = self[0, 0].device, self[0, 0].dtype
+        mat = torch.zeros((self.d1, self.d2, self.d3)).to().to(dtype).to(device)
+        for dims in product([range(d) for d in self.dims]):
+            i, j, rest = dims[0], dims[1], dims[2:]
+            mat[dims] = self[i, j][rest]
+        return mat
+
+
+def minGamma(inputs, gamma=1, keepdim=True):
+    """continuous relaxation of min defined in the D3TW paper"""
+    if type(inputs) == list:
+        if inputs[0].shape[0] == 1:
+            inputs = torch.cat(inputs)
+        else:
+            inputs = torch.stack(inputs, dim=0)
+
+    if gamma == 0:
+        minG = inputs.min(dim=0, keepdim=keepdim)
+    else:
+        # log-sum-exp stabilization trick
+        zi = -inputs / gamma
+        max_zi = zi.max()
+        log_sum_G = max_zi + log(exp(zi - max_zi).sum(dim=0, keepdim=keepdim) + 1e-5)
+        minG = -gamma * log_sum_G
+    return minG
+
+
+def minProb(inputs, gamma=1, keepdim=True):
+    if type(inputs) == list:
+        if inputs[0].shape[0] == 1:
+            inputs = torch.cat(inputs)
+        else:
+            inputs = torch.stack(inputs, dim=0)
+
+    if gamma == 0:
+        minP = inputs.min(dim=0, keepdim=keepdim)
+    else:
+        probs = F.softmax(-inputs / gamma, dim=0)
+        minP = (probs * inputs).sum(dim=0, keepdim=keepdim)
+    return minP
+
+
+def prob_min(values, gamma_min, logits=None):
+    logits = values if logits is None else logits
+    assert len(logits) == len(values), "Values and prob logits are of different length"
+
+    if len(values) > 1:
+        values = torch.cat(values, dim=-1)
+        logits = torch.cat(logits, dim=-1)
+    else:
+        values = values[0]
+        logits = logits[0]
+
+    if gamma_min > 0:
+        probs = F.softmax(-logits / gamma_min, dim=-1)
+    else:
+        probs = F.one_hot(logits.argmin(), logits.size(-1))
+
+    if values.dim() > probs.dim():
+        probs = probs[..., None, :]
+
+    out = (values * probs).sum(-1).to(values.dtype)
+    return out
+
+
+def list_min(values, keys=None):
+    keys = values if keys is None else keys
+    assert len(keys) == len(values), "Values and prob logits are of different length"
+
+    if values[0].dim() == keys[0].dim() + 1:
+        dim = -2
+    else:
+        dim = -1
+
+    if len(values) > 1:
+        values = torch.cat(values, dim=dim)
+        keys = torch.cat(keys, dim=-1)
+    else:
+        values = values[0]
+        keys = keys[0]
+
+    onehot = F.one_hot(keys.argmin(-1), keys.size(-1))
+    if values.dim() > keys.dim():
+        onehot = onehot[..., None]
+    out = (values * onehot).sum(dim).to(values.dtype)
+    return out
+
+
+def traceback(D):
+    i, j = np.array(D.shape) - 2
+    p, q = [i], [j]
+    while (i > 0) or (j > 0):
+        tb = np.argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
+        if tb == 0:
+            i -= 1
+            j -= 1
+        elif tb == 1:
+            i -= 1
+        else:  # (tb == 2):
+            j -= 1
+        p.insert(0, i)
+        q.insert(0, j)
+    return np.array(p), np.array(q)
+
+
+def diag_to_mat(diags, K, N):
+    mat = np.zeros([K, N]) - 123
+    for d in range(len(diags)):
+        for r, v in enumerate(diags[d]):
+            j = min(d, N - 1) - r
+            i = d - j
+            mat[i, j] = v if v < 1e8 else np.inf
+    return mat
+
+
+def pad_costs(zx_costs_list, drop_costs_list):
+    B = len(zx_costs_list)
+    Ns, Ks = [], []
+    for i in range(B):
+        Ki, Ni = zx_costs_list[i].shape
+        if Ki >= Ni:
+            # in case the number of steps is greater than the number of frames,
+            # duplicate every frame and let the drops do the job.
+            mult = math.ceil(Ki / Ni)
+            zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1])
+            drop_costs_list[i] = torch.stack([drop_costs_list[i]] * mult, dim=-1).reshape([-1])
+            Ni *= mult
+        Ns.append(Ni)
+        Ks.append(Ki)
+    N, K = max(Ns), max(Ks)
+
+    # preparing padded tables
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], []
+    for i in range(B):
+        zx_costs = zx_costs_list[i]
+        drop_costs = drop_costs_list[i]
+        cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+        # padding everything to the size of the largest N and K
+        row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device)
+        padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad]))
+        padded_drop_costs.append(torch.cat([drop_costs, row_pad]))
+        multirow_pad = torch.stack([row_pad + 9999999999] * Ks[i], dim=0)
+        padded_table = torch.cat([zx_costs, multirow_pad], dim=1)
+        rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + 9999999999
+        padded_table = torch.cat([padded_table, rest_pad], dim=0)
+        padded_zx_costs.append(padded_table)
+    return padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks
+
+
+def get_diag_coord_grid(B, d_len, num_states, d_idx):
+    """
+    B - batch size
+    d - num_elements in the diagonal
+    num_states - number of states in DP table
+    d_idx - idx of the diagonal , used for marking
+    """
+    r = torch.arange(d_len)
+    s = torch.arange(num_states)
+    d = torch.ones(d_len, num_states) * d_idx
+    mg = torch.stack([d, *torch.meshgrid(r, s)], dim=-1)[None, ...].repeat([B, 1, 1, 1])
+    return mg
+
+
+def diag_traceback(pointer, N, paths):
+    # getting rid of unnecessary elements in the batch
+    pointer = [int(l.item()) for l in pointer]
+    d, r, s = pointer
+    traceback = [pointer]
+    while d > 0:
+        new_pointer = [int(l.item()) for l in paths[d][r, s]]
+        traceback.append(new_pointer)
+        d, r, s = new_pointer
+
+    # transform to rectangular coordinates
+    rectangular_traceback = []
+    for d, r, s in traceback:
+        i = r + max(0, d - N + 1)
+        j = d - i
+        if i > 0 and j > 0:
+            rectangular_traceback.append((i, j, s))
+
+    return traceback, rectangular_traceback
+
+
+def nw_diag_traceback(d, r, N, paths):
+    d, r = int(d.item()), int(r.item())
+    traceback = []
+    while d > 0:
+        d_1, s_1, s = [int(l.item()) for l in paths[d][r, 0]]
+        traceback.append((d, r, s))
+        d, r = d_1, s_1
+
+    # transform to rectangular coordinates
+    rectangular_traceback = []
+    for d, r, s in traceback:
+        i = r + max(0, d - N + 1)
+        j = d - i
+        if i > 0 and j > 0:
+            rectangular_traceback.append((i, j, s))
+
+    return traceback, rectangular_traceback
+
+
+def compute_symmetric_cost(sim, keep_percentile=0.3):
+    k = max([1, int(torch.numel(sim) * keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+    zx_costs = -sim
+    x_drop_costs = -baseline_logits.squeeze()
+    z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze()
+    return zx_costs, x_drop_costs, z_drop_costs
+
+
+#============ Hach from model_utilis.py in StepFormer ============#
+
+
+
+def unique_softmax(sim, labels, gamma=1, dim=0):
+    assert sim.shape[0] == labels.shape[0]
+    labels = labels.detach().cpu().numpy()
+    _, unique_index, unique_inverse_index = np.unique(labels, return_index=True, return_inverse=True)
+    unique_sim = sim[unique_index]
+    unique_softmax_sim = torch.nn.functional.softmax(unique_sim / gamma, dim=dim)
+    softmax_sim = unique_softmax_sim[unique_inverse_index]
+    return softmax_sim
+
+def compute_masked_sims(z, x, z_pad_mask, x_pad_mask, l2_normalize=False, softmax_dim=None, gamma=None):
+    # z ~ [B, K, d], x ~ [B, N, d]
+    if l2_normalize:
+        z, x = F.normalize(z, dim=-1), F.normalize(x, dim=-1)
+    pad_sims = torch.einsum("bkd,bnd->bkn", z, x)
+    masked_sims = []
+    for i in range(x.shape[0]):
+        masked_sim = pad_sims[i]
+        masked_sim = masked_sim if z_pad_mask is None else masked_sim[~z_pad_mask[i], :]
+        masked_sim = masked_sim if x_pad_mask is None else masked_sim[:, ~x_pad_mask[i]]
+        if softmax_dim is not None:
+            masked_sim = F.softmax(masked_sim / gamma, dim=softmax_dim)
+        masked_sims.append(masked_sim)
+    return masked_sims  
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+
+def cosine_sim(x, z):
+    cos_sim_fn = torch.nn.CosineSimilarity(dim=1)
+    return cos_sim_fn(x[..., None], z.T[None, ...])
+
+
+def cos_dist(x, z):
+    cos_sim_fn = torch.nn.CosineSimilarity(dim=1)
+    return (1 - cos_sim_fn(x[..., None], z.T[None, ...])) / 2
+
+
+def l2_dist(x, z):
+    dist_squared = (x**2).sum() + (z**2).sum() - 2 * x @ z.T
+    return torch.clamp(dist_squared, min=0).sqrt()
+
+
+def cos_loglikelihood(x, z, gamma=0.1, z_dim=1):
+    cos_sim = cosine_sim(x, z)
+    probs = F.softmax(cos_sim / gamma, dim=z_dim)
+    return torch.log(probs)
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/dp/exact_dp.py b/yc2_univl/backup/pdvc/dp/exact_dp.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada874b89a60799af867aab82357c8d7b442348d
--- /dev/null
+++ b/yc2_univl/backup/pdvc/dp/exact_dp.py
@@ -0,0 +1,1123 @@
+import torch
+import numpy as np
+import torch.nn.functional as F
+from functools import partial
+from copy import copy
+
+# from dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min
+from pdvc.dp.dp_utils import get_diag_coord_grid, diag_traceback, nw_diag_traceback, list_min
+
+
+def crosstask_dp(cost_matrix, exactly_one=True, bg_cost=0):
+    "Algorithm used in Cross-Task to calculate Recall"
+
+    def get_step(k):
+        return 0 if k % 2 == 0 else int((k + 1) / 2)
+
+    T = cost_matrix.shape[0]
+    K = cost_matrix.shape[1]
+    K_ext = int(2 * K + 1)
+
+    L = -np.ones([T + 1, K_ext], dtype=float)
+    P = -np.ones([T + 1, K_ext], dtype=float)
+    L[0, 0] = 0
+    P[0, 0] = 0
+
+    for t in range(1, T + 1):
+        Lt = L[t - 1, :]
+        Pt = P[t - 1, :]
+        for k in range(K_ext):
+            s = get_step(k)
+            opt_label = -1
+
+            j = k
+            if (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1 and (s == 0 or not exactly_one):
+                opt_label = j
+                opt_value = Lt[j]
+
+            j = k - 1
+            if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1:
+                opt_label = j
+                opt_value = L[t - 1][j]
+
+            if s != 0:
+                j = k - 2
+                if j >= 0 and (opt_label == -1 or opt_value > Lt[j]) and Pt[j] != -1:
+                    opt_label = j
+                    opt_value = Lt[j]
+
+            if s != 0:
+                L[t, k] = opt_value + cost_matrix[t - 1][s - 1]
+            else:
+                L[t, k] = opt_value + bg_cost
+            P[t, k] = opt_label
+
+    labels = np.zeros_like(cost_matrix)
+    if L[T, K_ext - 1] < L[T, K_ext - 2] or (P[T, K_ext - 2] == -1):
+        k = K_ext - 1
+    else:
+        k = K_ext - 2
+    for t in range(T, 0, -1):
+        s = get_step(k)
+        if s > 0:
+            labels[t - 1, s - 1] = 1
+        k = P[t, k].astype(int)
+    return labels
+
+
+def iou_based_matching(pred_seg, gt_seg, pred_step_ids, gt_step_ids, ignore_class=True):
+    """Performs the matching of predicted and gt sequence segments"""
+    pred_segments = torch.stack([pred_seg == idx for idx in pred_step_ids], 0)  # [N_pred, T]
+    gt_segments = torch.stack([gt_seg == idx for idx in gt_step_ids], 0)  # [N_gt, T]
+    intersection = (
+        torch.logical_and(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1)
+    )  # [N_pred, N_gt]
+    union = torch.logical_or(pred_segments.unsqueeze(1), gt_segments.unsqueeze(0)).to(int).sum(-1)  # [N_pred, N_gt]
+    iou = intersection / (union + 1e-5)  # [N_pred, N_gt]
+
+    C = -iou.detach().cpu().numpy().T  # [N_gt, N_pred]
+    if not ignore_class:
+        print("Not ignoring class")
+        is_same_step_id = pred_step_ids.unsqueeze(1) == gt_step_ids.unsqueeze(0)  # [N_pred, N_gt]
+        if is_same_step_id.shape == (1, 1):
+            C[0, 0] += 9999 * (~is_same_step_id[0, 0])
+        else:
+            C[~is_same_step_id] = 9999
+
+    x_drop, z_drop = np.zeros(C.shape[1]), np.zeros(C.shape[0])
+    labels = double_drop_dtw(C, x_drop, z_drop, one_to_many=False, many_to_one=False, return_labels=True) - 1
+    indices = (np.arange(len(labels))[labels > -1], labels[labels > -1])
+    return [torch.as_tensor(i, dtype=torch.int64) for i in indices]
+
+
+def drop_dtw(zx_costs, drop_costs, exclusive=True, contiguous=True, one_to_one=False, return_labels=False):
+    """Drop-DTW algorithm that allows drop only from one (video) side. See Algorithm 1 in the paper.
+
+    Parameters
+    ----------
+    zx_costs: np.ndarray [K, N]
+        pairwise match costs between K steps and N video clips
+    drop_costs: np.ndarray [N]
+        drop costs for each clip
+    exclusive: bool
+        If True any clip can be matched with only one step, not many.
+    contiguous: bool
+        if True, can only match a contiguous sequence of clips to a step
+        (i.e. no drops in between the clips)
+    return_label: bool
+        if True, returns output directly useful for segmentation computation (made for convenience)
+    """
+    K, N = zx_costs.shape
+
+    # D: the dynamic programming table, which records the intermediate costs
+    # P: the path tracking table, which records the previous location and state (zi, xi, prev_state)
+
+    # initialize solutin matrices
+    D = np.zeros([K + 1, N + 1, 2])  # the 2 last dimensions correspond to different states.
+    # State (dim) 0 - x is matched; State 1 - x is dropped
+    D[1:, 0, :] = np.inf  # no drops in z in any state
+    D[0, 1:, 0] = np.inf  # no drops in x in state 0, i.e. state where x is matched
+    D[0, 1:, 1] = np.cumsum(drop_costs)  # drop costs initizlization in state 1
+
+    # initialize path tracking info for each state
+    P = np.zeros([K + 1, N + 1, 2, 3], dtype=int) # the last dimension records the previous location and state (zi, xi, prev_state)
+    for xi in range(1, N + 1):
+        P[0, xi, 1] = 0, xi - 1, 1
+    # filling in the dynamic tables
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1]
+            diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states]
+            diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states]
+
+            left_neigh_states = [0, 1]
+            left_neigh_coords = [(zi, xi - 1) for _ in left_neigh_states]
+            left_neigh_costs = [D[zi, xi - 1, s] for s in left_neigh_states]
+
+            left_pos_neigh_states = [0] if contiguous else left_neigh_states
+            left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states]
+            left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states] # Drop between clips is not allowed when setting `contiguous==True` (one step to sparse clips is not allowed)
+
+            top_pos_neigh_states = [0]
+            top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states]
+            top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # state 0: matching x to z
+            neigh_states_pos = diag_neigh_states
+            neigh_coords_pos = diag_neigh_coords
+            neigh_costs_pos = diag_neigh_costs
+            if not one_to_one:
+                neigh_states_pos = neigh_states_pos + left_pos_neigh_states
+                neigh_coords_pos = neigh_coords_pos + left_pos_neigh_coords
+                neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs
+            if not exclusive: # exclusive=True indicates any clip can be matched with only one step, that is, path from top is not allowed
+                neigh_states_pos = neigh_states_pos + top_pos_neigh_states
+                neigh_coords_pos = neigh_coords_pos + top_pos_neigh_coords
+                neigh_costs_pos = neigh_costs_pos + left_pos_neigh_costs + top_pos_neigh_costs
+
+            costs_pos = np.array(neigh_costs_pos) + zx_costs[z_cost_ind, x_cost_ind] # calculate cumulative cost in current step
+            opt_ind_pos = np.argmin(costs_pos)
+            P[zi, xi, 0] = *neigh_coords_pos[opt_ind_pos], neigh_states_pos[opt_ind_pos] # Records the last step's position (zi,xi) and state (0 or 1)
+            D[zi, xi, 0] = costs_pos[opt_ind_pos]  # Update the minimal cumulative cost of selected path
+
+            # state 1: x is dropped
+            costs_neg = np.array(left_neigh_costs) + drop_costs[x_cost_ind]
+            opt_ind_neg = np.argmin(costs_neg)
+            P[zi, xi, 1] = *left_neigh_coords[opt_ind_neg], left_neigh_states[opt_ind_neg]
+            D[zi, xi, 1] = costs_neg[opt_ind_neg]
+
+    cur_state = D[K, N, :].argmin()
+    min_cost = D[K, N, cur_state]
+    #breakpoint()
+
+    # backtracking the solution
+    zi, xi = K, N
+    path, labels = [], np.zeros(N)
+    x_dropped = [] if cur_state == 1 else [N]
+    while not (zi == 0 and xi == 0):
+        path.append((zi, xi))
+        zi_prev, xi_prev, prev_state = P[zi, xi, cur_state]
+        if xi > 0:
+            labels[xi - 1] = zi * (cur_state == 0)  # either zi or 0
+        if prev_state == 1:
+            x_dropped.append(xi_prev)
+        zi, xi, cur_state = zi_prev, xi_prev, prev_state
+
+    if not return_labels:
+        return min_cost, D, path, x_dropped
+    else:
+        return labels
+
+
+def double_drop_dtw(
+    pairwise_zx_costs,
+    x_drop_costs,
+    z_drop_costs,
+    contiguous=True,
+    one_to_many=True,
+    many_to_one=True,
+    return_labels=False,
+):
+    """Drop-DTW algorithm that allows drops from both sequences. See Algorithm 1 in Appendix.
+
+    Parameters
+    ----------
+    pairwise_zx_costs: np.ndarray [K, N]
+        pairwise match costs between K steps and N video clips
+    x_drop_costs: np.ndarray [N]
+        drop costs for each clip
+    z_drop_costs: np.ndarray [N]
+        drop costs for each step
+    contiguous: bool
+        if True, can only match a contiguous sequence of clips to a step
+        (i.e. no drops in between the clips)
+    """
+    K, N = pairwise_zx_costs.shape
+
+    # initialize solution matrices
+    D = np.zeros([K + 1, N + 1, 4])  # the 4 dimensions are the following states: zx, z-, -x, --
+    # no drops allowed in zx DP. Setting the same for all DPs to change later here.
+    D[1:, 0, :] = 99999999
+    D[0, 1:, :] = 99999999
+    D[0, 0, 1:] = 99999999
+    # Allow to drop x in z- and --
+    D[0, 1:, 1], D[0, 1:, 3] = np.cumsum(x_drop_costs), np.cumsum(x_drop_costs)
+    # Allow to drop z in -x and --
+    D[1:, 0, 2], D[1:, 0, 3] = np.cumsum(z_drop_costs), np.cumsum(z_drop_costs)
+
+    # initialize path tracking info for each of the 4 DP tables:
+    P = np.zeros([K + 1, N + 1, 4, 3], dtype=int)  # (zi, xi, prev_state)
+    for zi in range(1, K + 1):
+        P[zi, 0, 2], P[zi, 0, 3] = (zi - 1, 0, 2), (zi - 1, 0, 3)
+    for xi in range(1, N + 1):
+        P[0, xi, 1], P[0, xi, 3] = (0, xi - 1, 1), (0, xi - 1, 3)
+
+    # filling in the dynamic tables
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1, 2, 3]  # zx, z-, -x, --
+            diag_neigh_coords = [(zi - 1, xi - 1) for _ in diag_neigh_states]
+            diag_neigh_costs = [D[zi - 1, xi - 1, s] for s in diag_neigh_states]
+
+            left_pos_neigh_states = [0, 1]  # zx and z-
+            left_pos_neigh_coords = [(zi, xi - 1) for _ in left_pos_neigh_states]
+            left_pos_neigh_costs = [D[zi, xi - 1, s] for s in left_pos_neigh_states]
+
+            top_pos_neigh_states = [0, 2]  # zx and -x
+            top_pos_neigh_coords = [(zi - 1, xi) for _ in top_pos_neigh_states]
+            top_pos_neigh_costs = [D[zi - 1, xi, s] for s in top_pos_neigh_states]
+
+            left_neg_neigh_states = [2, 3]  # -x and --
+            left_neg_neigh_coords = [(zi, xi - 1) for _ in left_neg_neigh_states]
+            left_neg_neigh_costs = [D[zi, xi - 1, s] for s in left_neg_neigh_states]
+
+            top_neg_neigh_states = [1, 3]  # z- and --
+            top_neg_neigh_coords = [(zi - 1, xi) for _ in top_neg_neigh_states]
+            top_neg_neigh_costs = [D[zi - 1, xi, s] for s in top_neg_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # DP 0: coming to zx
+            neigh_states_zx = diag_neigh_states
+            neigh_coords_zx = diag_neigh_coords
+            neigh_costs_zx = diag_neigh_costs
+            if one_to_many:
+                if contiguous:
+                    neigh_states_zx.extend(left_pos_neigh_states[0:1])
+                    neigh_coords_zx.extend(left_pos_neigh_coords[0:1])
+                    neigh_costs_zx.extend(left_pos_neigh_costs[0:1])
+                else:
+                    neigh_states_zx.extend(left_pos_neigh_states)
+                    neigh_coords_zx.extend(left_pos_neigh_coords)
+                    neigh_costs_zx.extend(left_pos_neigh_costs)
+            if many_to_one:
+                neigh_states_zx.extend(top_pos_neigh_states)
+                neigh_coords_zx.extend(top_pos_neigh_coords)
+                neigh_costs_zx.extend(top_pos_neigh_costs)
+
+            costs_zx = np.array(neigh_costs_zx) + pairwise_zx_costs[z_cost_ind, x_cost_ind]
+            opt_ind_zx = np.argmin(costs_zx)
+            P[zi, xi, 0] = *neigh_coords_zx[opt_ind_zx], neigh_states_zx[opt_ind_zx]
+            D[zi, xi, 0] = costs_zx[opt_ind_zx]
+
+            # DP 1: coming to z-
+            neigh_states_z_ = left_pos_neigh_states
+            neigh_coords_z_ = left_pos_neigh_coords
+            neigh_costs_z_ = left_pos_neigh_costs
+            costs_z_ = np.array(neigh_costs_z_) + x_drop_costs[x_cost_ind]
+            opt_ind_z_ = np.argmin(costs_z_)
+            P[zi, xi, 1] = *neigh_coords_z_[opt_ind_z_], neigh_states_z_[opt_ind_z_]
+            D[zi, xi, 1] = costs_z_[opt_ind_z_]
+
+            # DP 2: coming to -x
+            neigh_states__x = top_pos_neigh_states
+            neigh_coords__x = top_pos_neigh_coords
+            neigh_costs__x = top_pos_neigh_costs
+            costs__x = np.array(neigh_costs__x) + z_drop_costs[z_cost_ind]
+            opt_ind__x = np.argmin(costs__x)
+            P[zi, xi, 2] = *neigh_coords__x[opt_ind__x], neigh_states__x[opt_ind__x]
+            D[zi, xi, 2] = costs__x[opt_ind__x]
+
+            # DP 3: coming to --
+            neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states)
+            # neigh_states___ = np.array(left_neg_neigh_states + top_neg_neigh_states + diag_neigh_states)
+            # adding negative left and top neighbors
+            neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords)
+            # neigh_coords___ = np.array(left_neg_neigh_coords + top_neg_neigh_coords + diag_neigh_coords)
+            costs___ = np.concatenate(
+                [
+                    left_neg_neigh_costs + x_drop_costs[x_cost_ind],
+                    top_neg_neigh_costs + z_drop_costs[z_cost_ind],
+                    # diag_neigh_costs + z_drop_costs[z_cost_ind] + x_drop_costs[x_cost_ind],
+                ],
+                0,
+            )
+
+            opt_ind___ = costs___.argmin()
+            P[zi, xi, 3] = *neigh_coords___[opt_ind___], neigh_states___[opt_ind___]
+            D[zi, xi, 3] = costs___[opt_ind___]
+
+    cur_state = D[K, N, :].argmin()
+    min_cost = D[K, N, cur_state]
+
+    # unroll path
+    path = []
+    zi, xi = K, N
+    x_dropped = [N] if cur_state in [1, 3] else []
+    z_dropped = [K] if cur_state in [2, 3] else []
+    while not (zi == 0 and xi == 0):
+        path.append((zi, xi))
+        zi_prev, xi_prev, prev_state = P[zi, xi, cur_state]
+        if prev_state in [1, 3]:
+            x_dropped.append(xi_prev)
+        if prev_state in [2, 3]:
+            z_dropped.append(zi_prev)
+        zi, xi, cur_state = zi_prev, xi_prev, prev_state
+
+    if return_labels:
+        labels = np.zeros(N)
+        for zi, xi in path:
+            if zi not in z_dropped and xi not in x_dropped:
+                labels[xi - 1] = zi
+        return labels
+    else:
+        return min_cost, path, x_dropped, z_dropped
+
+
+def batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True
+):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]]
+        neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]]
+
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+        coord_left_pos, coord_left_neg = coord_left[..., [0, 1], :], coord_left[..., [2, 3], :]
+        coord_up_pos, coord_up_neg = coord_up[..., [0, 2], :], coord_up[..., [1, 3], :]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors_zx = [neigh_diag]
+        coordinates_zx = [coord_diag]
+        if one_to_many:
+            neighbors_zx.append(neigh_left_pos[..., [0]] if contiguous else neigh_left)
+            coordinates_zx.append(coord_left_pos[..., [0], :] if contiguous else coord_left)
+        if many_to_one:
+            neighbors_zx.append(neigh_up_pos)
+            coordinates_zx.append(coord_up_pos)
+        diag_zx = list_min(neighbors_zx) + match_costs_diag
+        path_zx = list_min(coordinates_zx, keys=neighbors_zx)
+
+        # DP 1: coming to z-
+        neighbors_z_ = [neigh_left_pos]
+        coordinates_z_ = [coord_left_pos]
+        diag_z_ = list_min(neighbors_z_) + x_drop_costs_diag
+        path_z_ = list_min(coordinates_z_, keys=neighbors_z_)
+
+        # DP 2: coming to -x
+        neighbors__x = [neigh_up_pos]
+        coordinates__x = [coord_up_pos]
+        diag__x = list_min(neighbors__x) + z_drop_costs_diag
+        path__x = list_min(coordinates__x, keys=neighbors__x)
+
+        # DP 3: coming to --
+        neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]]
+        coordinates___ = [coord_left_neg, coord_up_neg]
+        diag___ = list_min(neighbors___)
+        path___ = list_min(coordinates___, neighbors___)
+
+        # Aggregating all the dimensions of DP together
+        diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1)
+        path = torch.stack([path_zx, path_z_, path__x, path___], -2)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1
+            )  # [4, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 4, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)],
+                dim=-1,
+            )  # [4, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]])
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                # current_N = Ns[orig_b.item()] + 1
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 1], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = x1_dropcost[..., None]
+    diag_p_col = z1_dropcost[..., None]
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 1, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 1, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 1, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 1, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+
+        coord_up, coord_left, coord_diag = (
+            coord_p[:, :-1].clone(),
+            coord_p[:, 1:].clone(),
+            coord_pp[:, pp_start : (pp_start + size)].clone(),
+        )
+        # assign the right state to coordinates
+        coord_diag[..., 2] = 0
+        coord_left[..., 2] = 1
+        coord_up[..., 2] = 2
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors = [
+            neigh_diag + match_costs_diag[..., None],
+            neigh_left + x_drop_costs_diag[..., None],
+            neigh_up + z_drop_costs_diag[..., None],
+        ]
+        coordinates = [coord_diag, coord_left, coord_up]
+        diag = list_min(neighbors)[..., None]
+        path = (list_min(coordinates, keys=neighbors))[..., None, :]
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = x_drop_cost[..., None]
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(1) * (effective_d - 1), torch.zeros(1), torch.ones(1) * 1], dim=-1
+            )  # [1, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 1, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = z_drop_cost[..., None]
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(1) * (effective_d - 1), torch.ones(1) * (last_r_p - 1), torch.ones(1) * 2],
+                dim=-1,
+            )  # [1, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 1, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 1, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                this_paths = [p[b.item()] for p in all_paths]
+                current_N = N + 1
+                dc, rc, _ = coord_p[b, r][0]
+                tracebacks[orig_b.item()] = nw_diag_traceback(dc, rc, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def batch_drop_dtw_machine(zx_costs_list, x_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 2], device=dev)  # diag at i-2
+    x1_dropcost = all_cum_x_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 2, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 2, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 2, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 2, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]]
+
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+        coord_up_pos, coord_left_pos = coord_up[..., [0], :], coord_left[..., [0], :]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        pos_neighbors = [neigh_diag]
+        pos_coordinates = [coord_diag]
+        if one_to_many:
+            pos_neighbors.append(neigh_left_pos if contiguous else neigh_left)
+            pos_coordinates.append(coord_left_pos if contiguous else coord_left)
+        if many_to_one:
+            pos_neighbors.append(neigh_up)
+            pos_coordinates.append(coord_up)
+        diag_pos = list_min(pos_neighbors) + match_costs_diag
+        path_pos = list_min(pos_coordinates, keys=pos_neighbors)
+
+        neg_neighbors = [neigh_left]
+        neg_coordinates = [coord_left]
+        diag_neg = list_min(neg_neighbors) + x_drop_costs_diag
+        path_neg = list_min(neg_coordinates, keys=neg_neighbors)
+
+        diag = torch.stack([diag_pos, diag_neg], -1)
+        path = torch.stack([path_pos, path_neg], -2)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(2) * (effective_d - 1), torch.zeros(2), torch.arange(2)], dim=-1
+            )  # [2, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 2, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            pad = torch.stack([batch_inf, batch_inf], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(2) * (effective_d - 1), torch.ones(2) * (last_r_p - 1), torch.arange(2)],
+                dim=-1,
+            )  # [2, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 2, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_cum_x_drop_costs, batch_inf = [
+                t[~mask] for t in [all_x_drop_costs, all_cum_x_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+def fast_batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, many_to_one=False, one_to_many=False, contiguous=True
+):
+    # many_to_one is the same as not exclusive, i.e. multiple z match to one x
+    # one_to_many was always true by default before, i.e. multiple x match to one z
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+
+    # create routing masks for selection
+    # 4x3 corresponds to 4 states (zx, z-, -x, --) and 3 neighbors (l, d, u)
+    zx_mask = torch.zeros((4, 3))
+    zx_mask[:, 1] = 1
+    if one_to_many:
+        zx_mask[0, 0] = 1
+        if not contiguous:
+            zx_mask[1, 0] = 1
+    if many_to_one:
+        zx_mask[[0, 2], 2] = 1
+
+    z__mask = torch.zeros((4, 3))
+    z__mask[[0, 1], 0] = 1
+
+    _x_mask = torch.zeros((4, 3))
+    _x_mask[[0, 2], 2] = 1
+
+    ___mask = torch.zeros((4, 3))
+    ___mask[[2, 3], 0] = 1
+    ___mask[[1, 3], 2] = 1
+
+    mask = torch.stack([zx_mask, z__mask, _x_mask, ___mask], dim=-1).to(dev).to(dtype)  # [4, 3, 4]
+
+    def transition(
+        neigh_left, neigh_diag, neigh_up, coord_left, coord_diag, coord_up, match_costs, x_drop_costs, z_drop_costs
+    ):
+        all_neigh = torch.stack([neigh_left, neigh_diag, neigh_up], dim=-1)  # [B, d, 4, 3]
+        all_coords = torch.stack([coord_left, coord_diag, coord_up], dim=-1).permute(
+            [0, 1, 3, 2, 4]
+        )  # [B, d, 3, 4, 3], the first 3 is the spatial dimension of coordinates
+        additions_zx = match_costs[..., None].repeat([1, 1, 3])  # [B, d, 3]
+        additions_z_ = x_drop_costs[..., None].repeat([1, 1, 3])
+        additions__x = z_drop_costs[..., None].repeat([1, 1, 3])
+        additions___ = torch.stack([x_drop_costs, match_costs, z_drop_costs], dim=-1)
+        additions = torch.stack([additions_zx, additions_z_, additions__x, additions___], dim=-1)  # [B, d, 3, 4]
+
+        inverse_mask = (~(mask[None, None, ...].to(bool))).to(dtype)
+        filtered_costs = all_neigh[..., None] * mask[None, None, ...] + inverse_mask * inf[0]  #  [B, d, 4, 3, 4]
+        full_costs = filtered_costs + additions[:, :, None, :, :] * mask[None, None, ...]
+        B, d = full_costs.shape[:2]
+        the_min = full_costs.reshape([B, d, -1, 4]).min(dim=2)
+        new_diag = the_min.values
+
+        all_coords = all_coords[..., None].repeat([1, 1, 1, 1, 1, 4]).reshape([B, d, 3, -1, 4])
+        argmins = the_min.indices[:, :, None, None, :].repeat([1, 1, 3, 1, 1])
+        pointers = torch.gather(all_coords, index=argmins, dim=-2)
+        pointers = pointers[:, :, :, 0, :].permute([0, 1, 3, 2])
+        return new_diag, pointers
+
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The path is also a diagonal representation that carries the optimal pathlength to each point
+    path_pp = torch.zeros([B, 1, 4, 3], device=dev, dtype=int)
+    path_p = torch.zeros([B, 2, 4, 3], device=dev, dtype=int)
+    all_paths = [path_pp, path_p]  # going to store all the intermediate paths diagonals for the backtrack
+
+    # Coords is also a diagonal representation that carries the current coordinates in [d, r] for each point
+    # the last dimension is 3 because it's [d, r, s], where d is a diagonal, r is element's order in the diagonal
+    # and s is statet (one of the 4)
+    coord_pp = get_diag_coord_grid(B, 1, 4, 0).to(dev)
+    coord_p = get_diag_coord_grid(B, 2, 4, 1).to(dev)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    tracebacks = [None for _ in range(B)]  # going to store all the intermediate paths diagonals for the backtrack
+
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        coord_up, coord_left, coord_diag = coord_p[:, :-1], coord_p[:, 1:], coord_pp[:, pp_start : (pp_start + size)]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        diag, path = transition(
+            neigh_left,
+            neigh_diag,
+            neigh_up,
+            coord_left,
+            coord_diag,
+            coord_up,
+            match_costs_diag,
+            x_drop_costs_diag,
+            z_drop_costs_diag,
+        )
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        effective_d = d + 2  # effective count of d is actually d + 2, since started with 2
+        if d < N - 1:
+            # fill in 0th row of cost matrix with [inf, x_drop_cost, inf, x_drop_cost]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            cost_pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([cost_pad, diag], dim=1)
+
+            # fill in 0th row of path matrix with the right pointers
+            left_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.zeros(4), torch.arange(4)], dim=-1
+            )  # [4, 3]
+            left_pointer = (
+                left_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)
+            )  # [B, 1, 4, 3]
+            path = torch.cat([left_pointer, path], 1)
+        if d < K - 1:
+            # fill in 0th col of cost matrix with [inf, inf, z_drop_cost, z_drop_cost]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], dim=1)
+
+            # fill in 0th col of path matrix with the right pointers
+
+            # the number of elements in the prev diagonal. Refers to 0th element of the column
+            last_r_p = diag_p.size(1)
+            up_pointer = torch.stack(
+                [torch.ones(4) * (effective_d - 1), torch.ones(4) * (last_r_p - 1), torch.arange(4)],
+                dim=-1,
+            )  # [4, 3]
+            up_pointer = up_pointer[None, None, ...].repeat([diag.size(0), 1, 1, 1]).to(dev).to(dtype)  # [B, 1, 4, 3]
+            path = torch.cat([path, up_pointer], dim=1)
+
+        all_paths.append(path)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        coord_pp = coord_p
+        coord_p = get_diag_coord_grid(diag.size(0), diag.size(1), 4, effective_d).to(dev)
+
+        # process answers
+        if (Ds == d).any():
+            local_mask, orig_mask = Ds == d, Ds_orig == d
+            original_bs = torch.nonzero(orig_mask, as_tuple=False)[:, 0]
+            bs, rs = torch.nonzero(local_mask, as_tuple=False)[:, 0], Rs[local_mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + list_min([diag[bs, rs]])
+            for orig_b, b, r in zip(original_bs, bs, rs):
+                # min_costs[orig_b] = min_costs[orig_b] + list_min([diag[b, r]])
+                best_pointer = list_min([coord_p[b, r]], keys=[diag[b, r]])
+                this_paths = [p[b.item()] for p in all_paths]
+                # current_N = Ns[orig_b.item()] + 1
+                current_N = N + 1
+                tracebacks[orig_b.item()] = diag_traceback(best_pointer, current_N, this_paths)[1]
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs = [
+                t[~local_mask] for t in [diag, diag_p, diag_pp, coord_p, coord_pp, path, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf = [
+                t[~local_mask]
+                for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs, batch_inf]
+            ]
+            all_paths = [p[~local_mask] for p in all_paths]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    return min_costs, tracebacks
+
+
+if __name__ == '__main__':
+    zx_costs = np.random.rand(3, 4) # K=3 steps, N=4 clips
+    # zx_costs = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]])
+    drop_costs = np.random.rand(4)
+    align = drop_dtw(zx_costs, drop_costs)
+    #breakpoint()
diff --git a/yc2_univl/backup/pdvc/dp/soft_dp.py b/yc2_univl/backup/pdvc/dp/soft_dp.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d5c17e5d5eeff50254dc7b8d31f6d43b253e388
--- /dev/null
+++ b/yc2_univl/backup/pdvc/dp/soft_dp.py
@@ -0,0 +1,617 @@
+import numpy as np
+import torch
+import math
+from torch import log, exp
+import torch.nn.functional as F
+from copy import copy
+
+from pdvc.dp.dp_utils import VarTable, minGamma, minProb, pad_costs, prob_min, unique_softmax, cosine_sim
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def softDTW(
+    step_features,
+    frame_features,
+    labels,
+    dist_type="inner",
+    softning="prob",
+    gamma_min=0.1,
+    gamma_xz=0.1,
+    step_normalize=True,
+):
+    """function to obtain a soft (differentiable) version of DTW
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the function
+    _min_fn = minProb if softning == "prob" else minGamma
+    min_fn = lambda x: _min_fn(x, gamma=gamma_min)
+
+    # first get a pairwise distance matrix
+    if dist_type == "inner":
+        dist = step_features @ frame_features.T
+    else:
+        dist = cosine_sim(step_features, frame_features)
+    if step_normalize:
+        if labels is not None:
+            norm_dist = unique_softmax(dist, labels, gamma_xz)
+        else:
+            norm_dist = torch.softmax(dist / gamma_xz, 0)
+        dist = -log(norm_dist)
+
+    # initialize soft-DTW table
+    nrows, ncols = dist.shape
+    # sdtw = torch.zeros((nrows+1,ncols+1)).to(torch.float).to(device)
+    sdtw = VarTable((nrows + 1, ncols + 1))
+    for i in range(1, nrows + 1):
+        sdtw[i, 0] = 9999999999
+    for j in range(1, ncols + 1):
+        sdtw[0, j] = 9999999999
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for i in range(1, nrows + 1):
+        for j in range(1, ncols + 1):
+            neighbors = torch.stack([sdtw[i, j - 1], sdtw[i - 1, j - 1], sdtw[i - 1, j]])
+            di, dj = i - 1, j - 1  # in the distance matrix indices are shifted by one
+            new_val = dist[di, dj] + min_fn(neighbors)
+            sdtw[i, j] = torch.squeeze(new_val, 0)
+    sdtw_loss = sdtw[nrows, ncols] / step_features.shape[0]
+    return sdtw_loss, sdtw, dist
+
+
+def dropDTW(zx_costs, drop_costs, softning="prob", exclusive=True, contiguous=True, gamma_min=1):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the min function
+    min_fn = minProb if softning == "prob" else minGamma
+    inf = 9999999999
+    K, N = zx_costs.shape
+    exclusive = exclusive if K <= N else False
+    cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 3))  # This corresponds to B 3-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf
+    for xi in range(1, N + 1):
+        D[0, xi] = torch.zeros_like(D[0, xi]) + cum_drop_costs[xi - 1]
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1]
+            dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3]
+
+            # positive transition, i.e. matching x_i to z_j
+            if contiguous:
+                pos_neighbors = [d_diag, dp_left]
+            else:
+                pos_neighbors = [d_diag, d_left]
+            if not exclusive:
+                pos_neighbors.append(dp_up)
+
+            Dp = min_fn(pos_neighbors, gamma=gamma_min) + zx_costs[z_cost_ind, x_cost_ind]
+
+            # negative transition, i.e. dropping xi
+            Dm = d_left + drop_costs[x_cost_ind]
+
+            # update final solution matrix
+            D_final = min_fn([Dm, Dp], gamma=gamma_min)
+            D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_cost = D[K, N][0]
+    return min_cost, D
+
+
+def batch_dropDTW(
+    zx_costs_list, drop_costs_list, softning="prob", exclusive=True, contiguous=True, drop_mode="DropDTW", gamma_min=1
+):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    # defining the min function
+    min_fn = minProb if softning == "prob" else minGamma
+    inf = 9999999999
+
+    # pre-processing
+    B = len(zx_costs_list)
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list)
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+    N, K = max(Ns), max(Ks)
+
+    # preparing padded tables
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs = [], [], []
+    for i in range(B):
+        zx_costs = zx_costs_list[i]
+        drop_costs = drop_costs_list[i]
+        cum_drop_costs = torch.cumsum(drop_costs, dim=0)
+
+        # padding everything to the size of the largest N and K
+        row_pad = torch.zeros([N - Ns[i]]).to(zx_costs.device)
+        padded_cum_drop_costs.append(torch.cat([cum_drop_costs, row_pad]))
+        padded_drop_costs.append(torch.cat([drop_costs, row_pad]))
+        multirow_pad = torch.stack([row_pad + inf] * Ks[i], dim=0)
+        padded_table = torch.cat([zx_costs, multirow_pad], dim=1)
+        rest_pad = torch.zeros([K - Ks[i], N]).to(zx_costs.device) + inf
+        padded_table = torch.cat([padded_table, rest_pad], dim=0)
+        padded_zx_costs.append(padded_table)
+
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 3, B))  # This corresponds to B 3-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + inf
+    for xi in range(1, N + 1):
+        if drop_mode == "DropDTW":
+            D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi]
+        elif drop_mode == "OTAM":
+            D[0, xi] = torch.zeros_like(D[0, xi])
+        else:  # drop_mode == 'DTW'
+            D[0, xi] = torch.zeros_like(D[0, xi]) + inf
+
+    # obtain dtw table using min_gamma or softMin relaxation
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            d_diag, d_left = D[zi - 1, xi - 1][0:1], D[zi, xi - 1][0:1]
+            dp_left, dp_up = D[zi, xi - 1][2:3], D[zi - 1, xi][2:3]
+
+            if drop_mode == "DropDTW":
+                # positive transition, i.e. matching x_i to z_j
+                if contiguous:
+                    pos_neighbors = [d_diag, dp_left]
+                else:
+                    pos_neighbors = [d_diag, d_left]
+                if not exclusive:
+                    pos_neighbors.append(dp_up)
+
+                Dp = min_fn(pos_neighbors, gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind]
+
+                # negative transition, i.e. dropping xi
+                Dm = d_left + all_drop_costs[x_cost_ind]
+
+                # update final solution matrix
+                D_final = min_fn([Dm, Dp], gamma=gamma_min)
+            else:
+                d_right = D[zi - 1, xi][0:1]
+                D_final = Dm = Dp = (
+                    min_fn([d_diag, d_left, d_right], gamma=gamma_min) + all_zx_costs[z_cost_ind, x_cost_ind]
+                )
+            D[zi, xi] = torch.cat([D_final, Dm, Dp], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_costs = []
+    for i in range(B):
+        Ni, Ki = Ns[i], Ks[i]
+        min_cost_i = D[Ki, Ni][0, i]
+        min_costs.append(min_cost_i / Ni)
+
+    return min_costs, D
+
+
+def batch_double_dropDTW(zx_costs_list, drop_costs_list, gamma_min=1):
+    """function to obtain a soft (differentiable version of DTW)
+    embs1, embs2: embedding of size N*D and M*D (N and M : number of video frames
+    and D: dimensionality of of the embedding vector)
+    """
+    min_fn = lambda x: minProb(x, gamma=gamma_min)
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+
+    # assuming sequences are the same length
+    B = len(zx_costs_list)
+    padded_cum_drop_costs, padded_drop_costs, padded_zx_costs, Ns, Ks = pad_costs(zx_costs_list, drop_costs_list)
+    all_zx_costs = torch.stack(padded_zx_costs, dim=-1)
+    all_cum_drop_costs = torch.stack(padded_cum_drop_costs, dim=-1)
+    all_drop_costs = torch.stack(padded_drop_costs, dim=-1)
+    N, K = max(Ns), max(Ks)
+
+    # Creating and initializing DP tables
+    D = VarTable((K + 1, N + 1, 4, B), dtype, dev)  # This corresponds to B 4-dim DP tables
+    for zi in range(1, K + 1):
+        D[zi, 0] = torch.zeros_like(D[zi, 0]) + all_cum_drop_costs[(zi - 1) : zi]
+    for xi in range(1, N + 1):
+        D[0, xi] = torch.zeros_like(D[0, xi]) + all_cum_drop_costs[(xi - 1) : xi]
+
+    for zi in range(1, K + 1):
+        for xi in range(1, N + 1):
+            # define frequently met neighbors here
+            diag_neigh_states = [0, 1, 2, 3]  # zx, z-, -x, --
+            diag_neigh_costs = [D[zi - 1, xi - 1][s] for s in diag_neigh_states]
+
+            left_neigh_states = [0, 1]  # zx and z-
+            left_neigh_costs = [D[zi, xi - 1][s] for s in left_neigh_states]
+
+            upper_neigh_states = [0, 2]  # zx and -x
+            upper_neigh_costs = [D[zi - 1, xi][s] for s in upper_neigh_states]
+
+            z_cost_ind, x_cost_ind = zi - 1, xi - 1  # indexind in costs is shifted by 1
+
+            # DP 0: coming to zx
+            neigh_costs_zx = diag_neigh_costs + upper_neigh_costs + left_neigh_costs
+            D0 = min_fn(neigh_costs_zx) + all_zx_costs[z_cost_ind, x_cost_ind]
+
+            # DP 1: coming to z-
+            neigh_costs_z_ = left_neigh_costs
+            D1 = min_fn(neigh_costs_z_) + all_drop_costs[x_cost_ind]
+
+            # DP 2: coming to -x
+            neigh_costs__x = upper_neigh_costs
+            D2 = min_fn(neigh_costs__x) + all_drop_costs[z_cost_ind]
+
+            # DP 3: coming to --
+            costs___ = [d + all_drop_costs[z_cost_ind] * 2 for d in diag_neigh_costs] + [
+                D[zi, xi - 1][3] + all_drop_costs[x_cost_ind],
+                D[zi - 1, xi][3] + all_drop_costs[z_cost_ind],
+            ]
+            D3 = min_fn(costs___)
+
+            D[zi, xi] = torch.cat([D0, D1, D2, D3], dim=0)
+
+    # Computing the final min cost for the whole batch
+    min_costs = []
+    for i in range(B):
+        min_cost_i = min_fn(D[K, N][:, i])
+        min_costs.append(min_cost_i / N)
+    return min_costs, D
+
+
+def drop_dtw_machine(zx_costs, drop_costs, gamma_min=1, exclusive=True, contiguous=True):
+    K, N = zx_costs.shape
+    dev = zx_costs.device
+    flipped_costs = torch.flip(zx_costs, [0])  # flip the cost matrix upside down
+    cum_drop_costs = torch.cumsum(drop_costs, dim=-1)
+
+    # initialize first two contr diagonals
+    inf = torch.tensor([9999999999], device=dev, dtype=zx_costs.dtype)
+    diag_pp = torch.zeros([1, 2], device=dev)  # diag at i-2
+    diag_p_col = torch.ones([1, 2], device=dev) * inf
+    diag_p_row = torch.stack([inf, cum_drop_costs[[0]]], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 0)  # diag at i-1
+
+    for i in range(K + N - 1):
+        size = diag_p.size(0) - 1
+        pp_start = max(0, diag_pp.size(0) - diag_p.size(0))
+        neigh_up, neigh_left, neigh_diag = diag_p[:-1], diag_p[1:], diag_pp[pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[:, [0]], neigh_left[:, [0]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.flip(torch.diag(flipped_costs, i + 1 - K), [-1])
+        d_start, d_end = max(1 - K + i, 0), min(i, N - 1) + 1
+        drop_costs_diag = torch.flip(drop_costs[d_start:d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+        pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left]
+        if not exclusive:
+            pos_neighbors.append(neigh_up_pos)
+        diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag
+        diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag
+        diag = torch.stack([diag_pos, diag_neg], -1)
+
+        # add the initialization values on the ends of diagonal if needed
+        if i < N - 1:
+            # fill in 0th row with [drop_cost, inf]
+            pad = torch.stack([inf, cum_drop_costs[[i + 1]]], -1)
+            diag = torch.cat([pad, diag])
+        if i < K - 1:
+            # fill in 0th col with [inf, inf]
+            pad = torch.stack([inf, inf], -1)
+            diag = torch.cat([diag, pad])
+
+        diag_pp = diag_p
+        diag_p = diag
+    assert (diag.size(0) == 1) and (diag.size(1) == 2), f"Last diag shape is {diag.shape} instead of [1, 2]"
+
+    cost = prob_min(diag, gamma_min)
+    return cost
+
+
+def batch_drop_dtw_machine(zx_costs_list, drop_costs_list, gamma_min=1, exclusive=True, contiguous=True):
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    # For samples where K > N, exclusive computation is not possible
+    shapes = [t.shape for t in zx_costs_list]
+    Ks, Ns = [s[0] for s in shapes], [s[1] for s in shapes]
+    N, K = max(Ns), max(Ks)
+    persample_exclusive = torch.tensor([Ni >= Ki for Ki, Ni in shapes]).to(dev)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # define costs in tensors
+    all_zx_costs = [F.pad(c, [0, N - c.shape[1], 0, K - c.shape[0]]) for c in zx_costs_list]
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0)
+    all_cum_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. Here, 0 is keep, 1 is drop.
+     """
+    # initialize first two contr diagonals
+    batch_inf, batch_ones = torch.stack([inf] * B, 0), torch.ones([B, 1], device=dev, dtype=dtype)
+    diag_pp = torch.zeros([B, 1, 2], device=dev)  # diag at i-2
+    diag_p_col = torch.ones([B, 1, 2], device=dev) * batch_inf[..., None]
+    diag_p_row = torch.stack([batch_inf, all_cum_drop_costs[:, [0]]], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    # The pathlength path is also a diagonal representation that carries the optimal pathlength to each point
+    with torch.no_grad():
+        path_pp = torch.zeros([B, 1, 2], device=dev, dtype=dtype)
+        path_p = torch.ones([B, 2, 2], device=dev, dtype=dtype)
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)
+    path_lens = torch.zeros(B).to(dtype=dtype).to(device=dev)
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_up_pos, neigh_left_pos = neigh_up[..., [0]], neigh_left[..., [0]]
+
+        neigh_path_up, neigh_path_left, neigh_path_diag = (
+            path_p[:, :-1],
+            path_p[:, 1:],
+            path_pp[:, pp_start : (pp_start + size)],
+        )
+        neigh_path_up_pos, neigh_path_left_pos = neigh_path_up[..., [0]], neigh_path_left[..., [0]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        d_start, d_end = max(1 - K + d, 0), min(d, N - 1) + 1
+        drop_costs_diag = torch.flip(all_drop_costs[:, d_start:d_end], [-1])
+
+        # update positive and negative tables -> compute new diagonal
+        pos_neighbors = [neigh_diag, neigh_left_pos] if contiguous else [neigh_diag, neigh_left]
+        pos_path_neighbors = (
+            [neigh_path_diag, neigh_path_left_pos] if contiguous else [neigh_path_diag, neigh_path_left]
+        )
+        if exclusive and (~persample_exclusive).any():
+            # apply non-exclusive rule for some batch elements, via masing out the exclusive elements with inf
+            masked_neigh_up_pos = neigh_up_pos + persample_exclusive[:, None, None] * batch_inf[:, None]
+            pos_neighbors.append(masked_neigh_up_pos)
+
+            pos_path_neighbors.append(neigh_path_up_pos * (~persample_exclusive[:, None, None]))
+        elif not exclusive:
+            # apply standard non-exclusive rule to all batch elements
+            pos_neighbors.append(neigh_up_pos)
+            pos_path_neighbors.append(neigh_path_up_pos)
+
+        # DP Table update
+        diag_pos = prob_min(pos_neighbors, gamma_min) + match_costs_diag
+        diag_neg = prob_min([neigh_left], gamma_min) + drop_costs_diag
+        diag = torch.stack([diag_pos, diag_neg], -1)
+
+        # Path Table Update
+        with torch.no_grad():
+            path_pos = prob_min(pos_path_neighbors, gamma_min, pos_neighbors) + 1
+            path_neg = prob_min([neigh_path_left], gamma_min, [neigh_left]) + 1
+            path = torch.stack([path_pos, path_neg], -1)
+
+        # add the initialization values on the ends of diagonal if needed
+        if d < N - 1:
+            # fill in DP table's 0th row with [drop_cost, inf]
+            pad_d = torch.stack([batch_inf, all_cum_drop_costs[:, [d + 1]]], -1)
+            diag = torch.cat([pad_d, diag], 1)
+
+            # fill in Path table's 0th row with [d, inf]
+            pad_p = torch.stack([batch_inf, torch.zeros_like(batch_inf) + d], -1)
+            path = torch.cat([pad_p, path], 1)
+
+        if d < K - 1:
+            # fill in DP table's 0th col with [inf, inf]
+            pad_d = torch.stack([batch_inf, batch_inf], -1)
+            diag = torch.cat([diag, pad_d], 1)
+
+            # fill in Path table's 0th row with [d, inf]
+            pad_p = pad_d
+            path = torch.cat([path, pad_p], 1)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        path_pp = path_p
+        path_p = path
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min)
+            path_lens[orig_mask] = path_lens[orig_mask] + prob_min([path[bs, rs]], gamma_min, [diag[bs, rs]])
+
+            diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, path, path_p, path_pp, Ds, Rs, flipped_costs]
+            ]
+            all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive = [
+                t[~mask] for t in [all_drop_costs, all_cum_drop_costs, batch_inf, persample_exclusive]
+            ]
+            if torch.numel(Ds) == 0:
+                break
+
+    # costs = prob_min([diag], gamma_min)
+    costs_norm = min_costs / path_lens
+    return min_costs, path_lens
+
+
+def batch_double_drop_dtw_machine(
+    zx_costs_list, x_drop_costs_list, z_drop_costs_list, gamma_min=1, exclusive=True, contiguous=True
+):
+    dev, dtype = zx_costs_list[0].device, zx_costs_list[0].dtype
+    inf = torch.tensor([9999999999], device=dev, dtype=dtype)
+    B = len(zx_costs_list)
+
+    Ns, Ks = [], []
+    for i in range(B):
+        Ki, Ni = zx_costs_list[i].shape
+        if exclusive and Ki >= Ni:
+            # in case the number of steps is greater than the number of frames,
+            # duplicate every frame and let the drops do the job.
+            mult = math.ceil(Ki / Ni)
+            zx_costs_list[i] = torch.stack([zx_costs_list[i]] * mult, dim=-1).reshape([Ki, -1])
+            x_drop_costs_list[i] = torch.stack([x_drop_costs_list[i]] * mult, dim=-1).reshape([-1])
+            Ni *= mult
+        Ns.append(Ni)
+        Ks.append(Ki)
+    N, K = max(Ns), max(Ks)
+
+    # transform endpoints into diagonal coordinates
+    Ds, Rs = torch.zeros(B).to(dev).to(int), torch.zeros(B).to(dev).to(int)
+    for i, (Ki, Ni) in enumerate(zip(Ks, Ns)):
+        Ds[i] = Ki + Ni - 2
+        Rs[i] = min(Ds[i] + 2, N) - Ni
+    Ds_orig, Rs_orig = copy(Ds), copy(Rs)
+
+    # special padding of costs to ensure that the path goest through the endpoint
+    all_zx_costs = []
+    for i, c in enumerate(zx_costs_list):
+        c_inf_frame = F.pad(c, [0, 1, 0, 1], value=inf.item())
+        mask = torch.ones_like(c_inf_frame)
+        mask[-1, -1] = 0
+        c_pad = F.pad(c_inf_frame * mask, [0, N - c.shape[1] - 1, 0, K - c.shape[0] - 1])
+        all_zx_costs.append(c_pad)
+    all_zx_costs = torch.stack(all_zx_costs, 0)
+
+    all_x_drop_costs = torch.stack([F.pad(c, [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0)
+    all_cum_x_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, N - c.shape[0]], value=inf.item()) for c in x_drop_costs_list], 0
+    )
+    all_z_drop_costs = torch.stack([F.pad(c, [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0)
+    all_cum_z_drop_costs = torch.stack(
+        [F.pad(torch.cumsum(c, -1), [0, K - c.shape[0]], value=inf.item()) for c in z_drop_costs_list], 0
+    )
+    flipped_costs = torch.flip(all_zx_costs, [1])  # flip the cost matrix upside down
+
+    """Rules for the diagonals:
+        dim1: batch dimension
+        dim2: the diagonal itself. The first element along this dim corresponds
+              to the top right element on the diagonal. The movement is from top right
+              to bottom left, like that /
+        dim3: Keep and Drop dimensions of the DP table. The dimensions are as follows:
+              {0: zx, 1: z-, 2: -x, 3: --}
+     """
+    # initialize first two contr diagonals
+    batch_inf = torch.stack([inf] * B, 0)
+    diag_pp = torch.zeros([B, 1, 4], device=dev)  # diag at i-2
+    x1_dropcost, z1_dropcost = all_cum_x_drop_costs[:, [0]], all_cum_z_drop_costs[:, [0]]
+    diag_p_row = torch.stack([batch_inf, x1_dropcost, batch_inf, x1_dropcost], -1)
+    diag_p_col = torch.stack([batch_inf, batch_inf, z1_dropcost, z1_dropcost], -1)
+    diag_p = torch.cat([diag_p_row, diag_p_col], 1)  # diag at i-1
+
+    min_costs = torch.zeros(B).to(dtype=dtype).to(device=dev)  # for storing the solution for each element
+    for d in range(K + N - 1):
+        size = diag_p.size(1) - 1
+        pp_start = 0 if d < N else 1
+        neigh_up, neigh_left, neigh_diag = diag_p[:, :-1], diag_p[:, 1:], diag_pp[:, pp_start : (pp_start + size)]
+        neigh_left_pos, neigh_left_neg = neigh_left[..., [0, 1]], neigh_left[..., [2, 3]]
+        neigh_up_pos, neigh_up_neg = neigh_up[..., [0, 2]], neigh_up[..., [1, 3]]
+
+        # define match and drop cost vectors
+        match_costs_diag = torch.stack(
+            [torch.flip(torch.diag(flipped_costs[j], d + 1 - K), [-1]) for j in range(flipped_costs.size(0))], 0
+        )
+
+        x_d_start, x_d_end = max(d + 1 - K, 0), min(d, N - 1) + 1
+        x_drop_costs_diag = torch.flip(all_x_drop_costs[:, x_d_start:x_d_end], [-1])
+        z_d_start, z_d_end = max(d + 1 - N, 0), min(d, K - 1) + 1
+        z_drop_costs_diag = all_z_drop_costs[:, z_d_start:z_d_end]
+
+        # update positive and negative tables -> compute new diagonal
+
+        # DP 0: coming to zx
+        neighbors_zx = [neigh_diag, neigh_left_pos[..., [0]]] if contiguous else [neigh_diag, neigh_left_pos]
+        if not exclusive:
+            neighbors_zx.append(neigh_up_pos)
+        diag_zx = prob_min(neighbors_zx, gamma_min) + match_costs_diag
+
+        # DP 1: coming to z-
+        neighbors_z_ = [neigh_left_pos]
+        diag_z_ = prob_min(neighbors_z_, gamma_min) + x_drop_costs_diag
+
+        # DP 2: coming to -x
+        neighbors__x = [neigh_up_pos]
+        diag__x = prob_min(neighbors__x, gamma_min) + z_drop_costs_diag
+
+        # DP 3: coming to --
+        neighbors___ = [neigh_left_neg + x_drop_costs_diag[..., None], neigh_up_neg + z_drop_costs_diag[..., None]]
+        diag___ = prob_min(neighbors___, gamma_min)
+
+        # Aggregating all the dimensions of DP together
+        diag = torch.stack([diag_zx, diag_z_, diag__x, diag___], -1)
+
+        # Haven't done below
+        # add the initialization values on the ends of diagonal if needed
+        if d < N - 1:
+            # fill in 0th row with [drop_cost, inf]
+            x_drop_cost = all_cum_x_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, x_drop_cost, batch_inf, x_drop_cost], -1)
+            diag = torch.cat([pad, diag], 1)
+        if d < K - 1:
+            # fill in 0th col with [inf, inf]
+            z_drop_cost = all_cum_z_drop_costs[:, [d + 1]]
+            pad = torch.stack([batch_inf, batch_inf, z_drop_cost, z_drop_cost], -1)
+            diag = torch.cat([diag, pad], 1)
+
+        diag_pp = diag_p
+        diag_p = diag
+
+        # process answers
+        if (Ds == d).any():
+            mask, orig_mask = Ds == d, Ds_orig == d
+            bs, rs = torch.nonzero(mask, as_tuple=False)[:, 0], Rs[mask]
+            min_costs[orig_mask] = min_costs[orig_mask] + prob_min([diag[bs, rs]], gamma_min)
+
+            # filtering out already processed elements
+            diag, diag_p, diag_pp, Ds, Rs, flipped_costs = [
+                t[~mask] for t in [diag, diag_p, diag_pp, Ds, Rs, flipped_costs]
+            ]
+            all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs = [
+                t[~mask] for t in [all_x_drop_costs, all_z_drop_costs, all_cum_x_drop_costs, all_cum_z_drop_costs]
+            ]
+
+            if torch.numel(Ds) == 0:
+                break
+
+    costs_norm = min_costs / torch.tensor(Ns).to(dev)
+    return costs_norm
+
+
+if __name__ == "__main__":
+    from exact_dp import double_drop_dtw
+
+    K, N = 7, 15
+    zx_costs = torch.normal(torch.ones([K, N]))
+    x_drop_costs = zx_costs.mean(0)
+    z_drop_costs = zx_costs.mean(1)
+
+    min_cost, *_ = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy())
+    my_costs = batch_double_drop_dtw_machine([zx_costs], [x_drop_costs], [z_drop_costs], gamma_min=0)
+    print(my_costs * N, min_cost)
diff --git a/yc2_univl/backup/pdvc/dp/visualization.py b/yc2_univl/backup/pdvc/dp/visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..fed631a8979532253408fb402672eee0cc4a9a64
--- /dev/null
+++ b/yc2_univl/backup/pdvc/dp/visualization.py
@@ -0,0 +1,179 @@
+import io
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib.pyplot import figure
+from PIL import Image
+
+
+# defining the colors and shapes
+color_code = [
+    "blue",
+    "orange",
+    "green",
+    "red",
+    "purple",
+    "brown",
+    "pink",
+    "grey",
+    "olive",
+    "cyan",
+    "lime",
+    "grey",
+    "firebrick",
+    "coral",
+    "chocolate",
+    "saddlebrown",
+    "bisque",
+    "goldenrod",
+    "gold",
+    "khaki",
+    "darkolivegreen",
+    "greenyellow",
+    "palegreen",
+    "springgreen",
+    "aquamarine",
+    "teal",
+    "deepskyblue",
+    "navy",
+    "mediumslateblue",
+    "royalblue",
+    "indigo",
+    "magenta",
+    "deeppink",
+    "crimson",
+    "violet",
+    "snow",
+    "lightgrey",
+    "wheat",
+    "dodgerblue",
+    "darkseagreen",
+]
+color_code = color_code * 10
+shape_code = ["o", "s", "P", "*", "h", ">", "X", "d", "D", "v", "<", "p"]
+shape_code = shape_code * int(len(color_code) / len(shape_code) + 1)
+
+color_values = []
+for color in color_code:
+    _ = plt.fill([0, 0, 1, 1, 0], [0, 1, 1, 0, 0], color)
+    buf = io.BytesIO()
+    _ = plt.savefig(buf, format="png")
+    _ = plt.close()
+    buf.seek(0)
+    img = np.array(Image.open(buf).convert("RGB"))
+    color_values.append(img[100, 300])
+
+color_code_hex = []
+for color_value in color_values:
+    step_color_rgb = tuple([s.item() for s in color_value])
+    color_code_hex.append("#%02x%02x%02x" % step_color_rgb)
+
+
+def plot_alignment(
+    step_ids, frame_labels, step_colors, step_shapes, size=(15, 2), name="all_step_to_video", to_np=True, grid_on=True
+):
+    N_steps = len(frame_labels)
+
+    plt.rcParams["figure.figsize"] = (size[0], size[1])
+    ax = plt.subplot(1, 1, 1)
+    _ = ax.set_title(name)
+
+    tick_freq = 50 if N_steps > 1500 else 20
+    _ = plt.xticks(np.arange(0, N_steps, tick_freq))
+    _ = plt.xlim(0, N_steps)
+    _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True)
+
+    if grid_on:
+        _ = plt.grid()
+    else:
+        plt.plot(np.arange(len(frame_labels)), [1] * len(frame_labels), color="grey")
+
+    for si, step_id in enumerate(step_ids):
+        time, val = [], []
+        for i in range(N_steps):
+            if si + 1 == frame_labels[i]:
+                time.append(i)
+                val.append(1)
+        time, val = np.array(time), np.array(val)
+        _ = plt.plot(time, val, step_shapes[step_id], color=step_colors[step_id])
+
+    if to_np:
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        plt.close()
+        buf.seek(0)
+        img = np.array(Image.open(buf).convert("RGB"))
+        return img
+    else:
+        return plt
+
+
+def plot_step_to_video_alignment(corresp_mat, size=(15, 2)):
+    """corresp_mat is of shape [K, N], where K is num_steps, and N is video_len"""
+    step_ids = np.arange(corresp_mat.size(0)) + 1
+    labels = corresp_mat.to(float).argmax(0) + 1 * corresp_mat.to(bool).any(0)
+
+    K_present = corresp_mat.to(bool).any(1).to(int).sum().item()
+    name = f"Video Segmentation | {K_present} steps present"
+    return plot_alignment(step_ids, labels, color_code, shape_code, name=name, size=size)
+
+
+def plot_similarities(
+    sim,
+    drop_line=None,
+    colors=None,
+    select=None,
+    color_offset=0,
+    do_legend=True,
+    name="",
+    size=(15, 2),
+    grid_on=True,
+    to_np=True,
+    linewidth=1,
+):
+    colors = colors if colors is not None else color_code
+    K, N = sim.shape
+    select = select if select is not None else np.arange(K)
+
+    plt.rcParams["figure.figsize"] = (size[0], size[1])
+    ax = plt.subplot(1, 1, 1)
+    _ = ax.set_title(name)
+
+    _ = plt.xticks(np.arange(0, N, 20))
+    _ = plt.xlim(0, N)
+    _ = plt.tick_params(bottom=True, top=False, left=True, right=True, labelright=True)
+    if grid_on:
+        _ = plt.grid()
+
+    for i in range(K):
+        if i in select:
+            _ = plt.plot(np.arange(N), sim[i], color=colors[i + color_offset], label=str(i), linewidth=linewidth)
+
+    if drop_line is not None:
+        _ = plt.plot(np.arange(N), drop_line * np.ones(N), "--")
+
+    if do_legend:
+        _ = plt.xlim(0, N + int(0.10 * N))
+        plt.legend()
+
+    if to_np:
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        plt.close()
+        buf.seek(0)
+        img = np.array(Image.open(buf).convert("RGB"))
+        return img
+    else:
+        return plt
+
+
+def plot_gt_seg(N, starts, ends, colors=None, shapes=None, name="GT Seg", clip_len=1, size=(15, 2), grid_on=True):
+    colors = colors if colors is not None else color_code
+    shapes = shapes if shapes is not None else shape_code
+
+    K = len(starts)
+    labels = -np.ones(N)
+    for i in range(K):
+        s, e = int(starts[i]), int(ends[i])
+        labels[s : e + 1] = i
+    step_ids = np.arange(K)
+    return plot_alignment(step_ids, labels, colors, shapes, to_np=False, name=name, size=size, grid_on=grid_on)
diff --git a/yc2_univl/backup/pdvc/matcher.py b/yc2_univl/backup/pdvc/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..3311680756df6cf1efeed2bbe2ab55350525b4ce
--- /dev/null
+++ b/yc2_univl/backup/pdvc/matcher.py
@@ -0,0 +1,446 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Modules to compute the matching cost and solve the corresponding LSAP.
+"""
+import torch
+from scipy.optimize import linear_sum_assignment
+from torch import nn
+import torch.nn.functional as F
+from torch import log, exp
+import numpy as np
+
+from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou
+
+# For matcher_align
+from pdvc.dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine
+from pdvc.dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine
+from pdvc.dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine
+from pdvc.dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine
+# from dp.gpu_nw import gpu_nw
+from pdvc.dp.dp_utils import compute_all_costs, compute_double_costs
+
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+class HungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    def __init__(self,
+                 cost_class: float = 1,
+                 cost_bbox: float = 1,
+                 cost_giou: float = 1,
+                 cost_alpha = 0.25,
+                 cost_gamma = 2,
+                 use_pseudo_box = False):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        # self.cost_caption = cost_caption
+        self.cost_alpha = cost_alpha
+        self.cost_gamma = cost_gamma
+        self.use_pseudo_box = use_pseudo_box
+
+        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0 # or cost_caption!=0, "all costs cant be 0"
+        # breakpoint()
+
+    def forward(self, outputs, targets, verbose=False, many_to_one=False):
+        """ Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        with torch.no_grad():
+            bs, num_queries = outputs["pred_logits"].shape[:2]
+            # We flatten to compute the cost matrices in a batch
+            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
+            out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+            # Also concat the target labels and boxes
+            tgt_ids = torch.cat([v["labels"] for v in targets])
+            if self.use_pseudo_box and self.training:
+                # print('use pseudo box')
+                tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets])
+            else:
+                tgt_bbox = torch.cat([v["boxes"] for v in targets])
+                # print('use gt box')
+
+            # Compute the classification cost.
+            # alpha = 0.25
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+            # Compute the L1 cost between boxes
+            cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+            # breakpoint()
+
+            # Compute the giou cost betwen boxes
+            try:
+                cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox),
+                                        box_cl_to_xy(tgt_bbox))
+            except:
+                print('out_bbox', out_bbox)
+                print('tgt_bbox', tgt_bbox)
+                breakpoint()
+
+            # cost_caption = outputs['caption_costs'].flatten(0, 1)
+
+            # Final cost matrix
+            # breakpoint()
+            try: # [100, 10], [100, 11], [100, 10]
+                C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
+            except:
+                breakpoint()
+
+            costs = {'cost_bbox': cost_bbox,
+                     'cost_class': cost_class,
+                     'cost_giou': cost_giou,
+                     # 'cost_caption': cost_caption,
+                     'out_bbox': out_bbox[:, 0::2]}
+
+            if verbose:
+                print('\n')
+                print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0])
+                print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0])
+                print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0])
+                # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0])
+
+            C = C.view(bs, num_queries, -1).cpu()
+
+        
+            sizes = [len(v["boxes_pseudo"]) for v in targets] if self.use_pseudo_box else [len(v["boxes"]) for v in targets]
+            # pdb.set_trace()
+            indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+            m2o_rate = 4
+            rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))]
+            rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in
+                       enumerate(rl_indices)]
+
+            indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+
+            if verbose:
+                print('------matching results:')
+                print(indices)
+                for indice in indices:
+                    for i, j in zip(*indice):
+                        print(out_bbox[i][0::2], tgt_bbox[j][0::2])
+                print('-----topK scores:')
+                topk_indices = out_prob.topk(10, dim=0)
+                print(topk_indices)
+                for i,(v,ids) in enumerate(zip(*topk_indices)):
+                    print('top {}'.format(i))
+                    s= ''
+                    for name,cost in costs.items():
+                        s += name + ':{} '.format(cost[ids])
+                    print(s)
+
+            return indices, rl_indices
+
+class DTWMatcher(nn.Module):
+    '''
+    Drop_z: if True, then we drop both the x axis (query) and z axis (text)
+    One_to_many: multiple x match to one z
+    Many_to_one: multiple z match to one x 
+    '''
+    def __init__(self,
+                keep_percentile,
+                top_band_size=0,
+                given_droplines=None,
+                drop_z=True,
+                one_to_many=False,
+                many_to_one=False,
+                contiguous=False):
+        super().__init__()
+        self.keep_percentile = keep_percentile
+        self.top_band_size = top_band_size
+        self.given_droplines = given_droplines
+        self.drop_z = drop_z
+        self.one_to_many = one_to_many
+        self.many_to_one = many_to_one
+        self.contiguous = contiguous
+
+    def forward(self, ouputs, targets, text_embed, event_embed):
+        # computing alignments (without gradients)
+        orig_device = event_embed[0].device
+        # embarisingly, this is faster on CPU than on GPU!
+        sims = compute_sim(text_embed, event_embed, l2_norm=True)
+        #sims = [s.cpu() for s in sims]
+        sims = [sims.cpu()]
+        # TODO: Add the classification cost the the alignment cost
+        self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines]
+        with torch.no_grad():
+            zx_costs_list = []
+            x_drop_costs_list = []
+            z_drop_costs_list = []
+            for i, sim in enumerate(sims):
+                # computing the baseline logit
+                top_sim = sim
+                if self.given_droplines is None:
+                    if self.top_band_size > 0 and self.top_band_size < sim.shape[1]:
+                        top_sim = sim.topk(self.top_band_size, dim=1).values
+
+                    if self.keep_percentile > 1:
+                        dropline = top_sim.min() - 5
+                    else:
+                        k = max([1, int(torch.numel(top_sim) * self.keep_percentile)])
+                        dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach()
+                else:
+                    dropline = self.given_droplines[i]
+
+                # shift the costs by the drop logits, so I can set drop costs to 0 instead
+                zx_costs_list.append(dropline.reshape([1, 1]) - sim)
+                z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device)
+                x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device)
+                z_drop_costs_list.append(z_drop_cost)
+                x_drop_costs_list.append(x_drop_cost)
+
+            # TODO figure out if one_to_many and many_to_one should be on
+            align_paths, corresp_mats = None, None
+            if self.drop_z:
+                if not (self.one_to_many or self.many_to_one):
+                    _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                    # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                else:
+                    _, align_paths = exact_batch_double_drop_dtw_machine(
+                        # _, align_paths = fast_batch_double_drop_dtw_machine(
+                        zx_costs_list,
+                        x_drop_costs_list,
+                        z_drop_costs_list,
+                        one_to_many=self.one_to_many,
+                        many_to_one=self.many_to_one,
+                        contiguous=self.contiguous,
+                    )
+            else:
+                _, align_paths = exact_batch_drop_dtw_machine(
+                    zx_costs_list,
+                    x_drop_costs_list,
+                    one_to_many=self.one_to_many,
+                    many_to_one=self.many_to_one,
+                    contiguous=self.contiguous,
+                )
+
+            if corresp_mats is None:
+                corresp_matrices = []
+                for b_id, sim in enumerate(sims):
+                    corresp_matrix = torch.zeros_like(sim)
+                    for i, j, s in align_paths[b_id]:
+                        if s == 0:
+                            corresp_matrix[i - 1, j - 1] = 1
+                    corresp_matrices.append(corresp_matrix.to(orig_device))
+                    # corresp_matrices.append(corresp_matrix)
+            text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0])
+            query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0])
+            text_indices, rearrange = torch.sort(text_indices)
+            query_indices = query_indices[rearrange]
+            indices = [(query_indices, text_indices)]
+        #return align_paths, corresp_matrices
+        return indices, []
+
+class SimMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+    based on the similarity bewteen text embedding and query embedding
+    """
+    def __init__(self,
+                 cost_class: float = 1,
+                 cost_sim: float = 1,
+                 cost_bbox: float = 1,
+                 cost_giou: float = 1,
+                 cost_alpha = 0.25,
+                 cost_gamma = 2,
+                 use_pseudo_box = False):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_sim = cost_sim
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        # self.cost_caption = cost_caption
+        self.cost_alpha = cost_alpha
+        self.cost_gamma = cost_gamma
+        self.use_pseudo_box = use_pseudo_box
+
+        assert cost_class != 0 or cost_sim!=0, "all costs cannot be 0"
+        # breakpoint()
+
+    def forward(self, outputs, targets, text_embed, event_embed, verbose=False, many_to_one=False):
+        """ Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        with torch.no_grad():
+            bs, num_queries = outputs["pred_logits"].shape[:2]
+
+            # We flatten to compute the cost matrices in a batch
+            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
+            out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+            tgt_ids = torch.cat([v["labels"] for v in targets])
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+            # Also concat the target labels and boxes
+            # breakpoint()
+            if self.use_pseudo_box:
+                tgt_bbox = torch.cat([v["boxes_pseudo"] for v in targets])
+                # Compute the L1 cost between boxes
+                cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+                # Compute the giou cost betwen boxes
+                cost_giou = -generalized_box_iou(box_cl_to_xy(out_bbox),
+                                                box_cl_to_xy(tgt_bbox))
+            else:
+                cost_bbox = torch.zeros_like(cost_class)
+                cost_giou = torch.zeros_like(cost_class)
+
+            # Compute the classification cost.
+            # alpha = 0.25
+            alpha = self.cost_alpha
+            gamma = self.cost_gamma
+            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+            # breakpoint()
+            # Compute the similarity cost
+            cost_sim = compute_sim(text_embed, event_embed, l2_norm=True).permute(1,0)
+            cost_sim = torch.ones_like(cost_sim) - cost_sim
+            # breakpoint()
+
+            # cost_caption = outputs['caption_costs'].flatten(0, 1)
+
+            # Final cost matrix
+            C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_sim * cost_sim
+
+            costs = {'cost_bbox': cost_bbox,
+                     'cost_class': cost_class,
+                     'cost_giou': cost_giou,
+                     'cost_sim': cost_sim,
+                     # 'cost_caption': cost_caption,
+                     'out_bbox': out_bbox[:, 0::2],
+                     }
+
+            if verbose:
+                print('\n')
+                print(self.cost_bbox, cost_bbox.var(dim=0), cost_bbox.max(dim=0)[0] - cost_bbox.min(dim=0)[0])
+                print(self.cost_class, cost_class.var(dim=0), cost_class.max(dim=0)[0] - cost_class.min(dim=0)[0])
+                print(self.cost_giou, cost_giou.var(dim=0), cost_giou.max(dim=0)[0] - cost_giou.min(dim=0)[0])
+                print(self.cost_sim, cost_sim.var(dim=0), cost_sim.max(dim=0)[0] - cost_sim.min(dim=0)[0])
+                # print(self.cost_caption, cost_caption.var(dim=0), cost_caption.max(dim=0)[0] - cost_caption.min(dim=0)[0])
+
+            C = C.view(bs, num_queries, -1).cpu()
+
+            sizes = [text_embed.size(0)]
+            # pdb.set_trace()
+            indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+            m2o_rate = 4
+            rl_indices = [linear_sum_assignment(torch.cat([c[i]]*m2o_rate, -1)) for i, c in enumerate(C.split(sizes, -1))]
+            rl_indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j%sizes[ii], dtype=torch.int64)) for ii,(i, j) in
+                       enumerate(rl_indices)]
+
+            indices = [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+
+
+            return indices, rl_indices    
+
+def build_matcher(args):
+    if args.matcher_type == 'DTW':
+        return DTWMatcher(keep_percentile=args.align_keep_percentile,
+                    top_band_size=args.align_top_band_size,
+                    given_droplines=None,
+                    drop_z=args.align_drop_z,
+                    one_to_many=args.align_one_to_many,
+                    many_to_one=args.align_many_to_one,
+                    contiguous=args.align_contiguous)
+    elif args.matcher_type == 'Sim':
+        return SimMatcher(cost_class=args.set_cost_class,
+                                cost_sim=args.set_cost_sim,
+                                cost_bbox=args.set_cost_bbox,
+                                cost_giou=args.set_cost_giou,
+                                cost_alpha = args.cost_alpha,
+                                cost_gamma = args.cost_gamma,
+                                use_pseudo_box = args.use_pseudo_box
+                                )
+    else:
+        return HungarianMatcher(cost_class=args.set_cost_class,
+                                cost_bbox=args.set_cost_bbox,
+                                cost_giou=args.set_cost_giou,
+                                cost_alpha = args.cost_alpha,
+                                cost_gamma = args.cost_gamma,
+                                use_pseudo_box = args.use_pseudo_box
+                                )
+
+
+def build_matcher_simple():
+    #return DTWMatcher(keep_percentile=0.5)
+    return SimMatcher()
+
+if __name__ == '__main__':
+    text_embed = torch.rand(5, 128)
+    event_embed = torch.rand(15, 128)
+    #sim = torch.eye(3, 4)
+    aligner = build_matcher_simple()
+    indices, matrices = aligner(text_embed, event_embed)
+    breakpoint()
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/matcher_align.py b/yc2_univl/backup/pdvc/matcher_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9b93dce7e9ff252230fbb8f8bc2861ce3a16605
--- /dev/null
+++ b/yc2_univl/backup/pdvc/matcher_align.py
@@ -0,0 +1,154 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Modules to compute the matching cost and solve the corresponding LSAP.
+"""
+import torch
+import torch.nn.functional as F
+from torch import log, exp
+import numpy as np
+from torch import nn
+from scipy.optimize import linear_sum_assignment
+# from misc.detr_utils.box_ops import box_cl_to_xy, generalized_box_iou
+
+# For matcher_align
+from dp.soft_dp import batch_drop_dtw_machine, batch_double_drop_dtw_machine
+from dp.exact_dp import batch_double_drop_dtw_machine as exact_batch_double_drop_dtw_machine
+from dp.exact_dp import batch_drop_dtw_machine as exact_batch_drop_dtw_machine
+from dp.exact_dp import fast_batch_double_drop_dtw_machine, batch_NW_machine
+# from dp.gpu_nw import gpu_nw
+from dp.dp_utils import compute_all_costs, compute_double_costs
+
+
+def compute_sim(z, x, l2_norm):
+    if l2_norm:
+        return F.normalize(z, dim=1) @ F.normalize(x, dim=1).T
+    else:
+        return z @ x.T
+
+class DTWMatcher(nn.Module):
+    '''
+    Drop_z: if True, then we drop both the x axis (query) and z axis (text)
+    One_to_many: multiple x match to one z
+    Many_to_one: multiple z match to one x 
+    '''
+    def __init__(self,
+                keep_percentile,
+                top_band_size=0,
+                given_droplines=None,
+                drop_z=False,
+                one_to_many=False,
+                many_to_one=False,
+                contiguous=False):
+        super().__init__()
+        self.keep_percentile = keep_percentile
+        self.top_band_size = top_band_size
+        self.given_droplines = given_droplines
+        self.drop_z = drop_z
+        self.one_to_many = one_to_many
+        self.many_to_one = many_to_one
+        self.contiguous = contiguous
+
+    def forward(self, text_embed, event_embed):
+        # computing alignments (without gradients)
+        orig_device = event_embed.device
+        # embarisingly, this is faster on CPU than on GPU!
+        sims = compute_sim(text_embed, event_embed, l2_norm=True)
+        #sims = [s.cpu() for s in sims]
+        sims = [sims.cpu()]
+        self.given_droplines = None if self.given_droplines is None else [s.cpu() for s in self.given_droplines]
+        with torch.no_grad():
+            zx_costs_list = []
+            x_drop_costs_list = []
+            z_drop_costs_list = []
+            for i, sim in enumerate(sims):
+                # computing the baseline logit
+                top_sim = sim
+                if self.given_droplines is None:
+                    if self.top_band_size > 0 and self.top_band_size < sim.shape[1]:
+                        top_sim = sim.topk(self.top_band_size, dim=1).values
+
+                    if self.keep_percentile > 1:
+                        dropline = top_sim.min() - 5
+                    else:
+                        k = max([1, int(torch.numel(top_sim) * self.keep_percentile)])
+                        dropline = torch.topk(top_sim.reshape([-1]), k).values[-1].detach()
+                else:
+                    dropline = self.given_droplines[i]
+
+                # shift the costs by the drop logits, so I can set drop costs to 0 instead
+                zx_costs_list.append(dropline.reshape([1, 1]) - sim)
+                z_drop_cost = torch.zeros([sim.size(0)]).to(sim.device)
+                x_drop_cost = torch.zeros([sim.size(1)]).to(sim.device)
+                z_drop_costs_list.append(z_drop_cost)
+                x_drop_costs_list.append(x_drop_cost)
+
+            # TODO figure out if one_to_many and many_to_one should be on
+            align_paths, corresp_mats = None, None
+            if self.drop_z:
+                if not (self.one_to_many or self.many_to_one):
+                    _, align_paths = batch_NW_machine(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                    # corresp_mats = gpu_nw(zx_costs_list, x_drop_costs_list, z_drop_costs_list)
+                else:
+                    _, align_paths = exact_batch_double_drop_dtw_machine(
+                        # _, align_paths = fast_batch_double_drop_dtw_machine(
+                        zx_costs_list,
+                        x_drop_costs_list,
+                        z_drop_costs_list,
+                        one_to_many=self.one_to_many,
+                        many_to_one=self.many_to_one,
+                        contiguous=self.contiguous,
+                    )
+            else:
+                _, align_paths = exact_batch_drop_dtw_machine(
+                    zx_costs_list,
+                    x_drop_costs_list,
+                    one_to_many=self.one_to_many,
+                    many_to_one=self.many_to_one,
+                    contiguous=self.contiguous,
+                )
+
+            if corresp_mats is None:
+                corresp_matrices = []
+                for b_id, sim in enumerate(sims):
+                    corresp_matrix = torch.zeros_like(sim)
+                    for i, j, s in align_paths[b_id]:
+                        if s == 0:
+                            corresp_matrix[i - 1, j - 1] = 1
+                    corresp_matrices.append(corresp_matrix.to(orig_device))
+                    # corresp_matrices.append(corresp_matrix)
+            text_indices = torch.stack([(torch.as_tensor(i-1, dtype=torch.int64)) for i, _, k in align_paths[-1] if k == 0])
+            query_indices = torch.stack([(torch.as_tensor(j-1, dtype=torch.int64)) for _, j, k in align_paths[-1] if k == 0])
+            text_indices, rearrange = torch.sort(text_indices)
+            query_indices = query_indices[rearrange]
+            indices = [(query_indices, text_indices)]
+        #return align_paths, corresp_matrices
+        return indices, _
+
+def build_matcher(args):
+    return DTWMatcher(keep_percentile=args.align_keep_percentile,
+                    top_band_size=args.align_top_band_size,
+                    given_droplines=None,
+                    drop_z=args.align_drop_z,
+                    one_to_many=args.align_one_to_many,
+                    many_to_one=args.align_many_to_one,
+                    contiguous=args.align_contiguous)
+
+
+def build_matcher_simple():
+    return DTWMatcher(keep_percentile=0.5)
+
+if __name__ == '__main__':
+    text_embed = torch.rand(5, 128)
+    event_embed = torch.rand(15, 128)
+    #sim = torch.eye(3, 4)
+    aligner = build_matcher_simple()
+    indices, matrices = aligner(text_embed, event_embed)
+    breakpoint()
diff --git a/yc2_univl/backup/pdvc/modules/UniVL_mini.py b/yc2_univl/backup/pdvc/modules/UniVL_mini.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c9d6e960cc742b2eed92827f568734ae91073ce
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/UniVL_mini.py
@@ -0,0 +1,1292 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+import copy
+import math
+import logging
+import collections
+import unicodedata
+import os
+from urllib.parse import urlparse
+from typing import Optional, Tuple, Union, IO, Callable, Set
+from pathlib import Path
+import shutil
+import tempfile
+import json
+from hashlib import sha256
+from functools import wraps
+import boto3
+from botocore.exceptions import ClientError
+import requests
+from tqdm import tqdm
+
+
+import torch
+from torch import nn
+
+
+logger = logging.getLogger(__name__)
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz",
+}
+
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
+    'base-uncased': 512,
+    'large-uncased': 512,
+    'base-cased': 512,
+    'large-cased': 512,
+    'base-multilingual-uncased': 512,
+    'base-multilingual-cased': 512,
+    'base-chinese': 512,
+}
+VOCAB_NAME = 'vocab.txt'
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+def split_s3_path(url: str) -> Tuple[str, str]:
+    """Split a full s3 path into the bucket name and path."""
+    parsed = urlparse(url)
+    if not parsed.netloc or not parsed.path:
+        raise ValueError("bad s3 path {}".format(url))
+    bucket_name = parsed.netloc
+    s3_path = parsed.path
+    # Remove '/' at beginning of path.
+    if s3_path.startswith("/"):
+        s3_path = s3_path[1:]
+    return bucket_name, s3_path
+
+def s3_request(func: Callable):
+    """
+    Wrapper function for s3 requests in order to create more helpful error
+    messages.
+    """
+
+    @wraps(func)
+    def wrapper(url: str, *args, **kwargs):
+        try:
+            return func(url, *args, **kwargs)
+        except ClientError as exc:
+            if int(exc.response["Error"]["Code"]) == 404:
+                raise FileNotFoundError("file {} not found".format(url))
+            else:
+                raise
+
+    return wrapper
+
+@s3_request
+def s3_etag(url: str) -> Optional[str]:
+    """Check ETag on S3 object."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_object = s3_resource.Object(bucket_name, s3_path)
+    return s3_object.e_tag
+
+@s3_request
+def s3_get(url: str, temp_file: IO) -> None:
+    """Pull a file directly from S3."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file)
+
+def url_to_filename(url: str, etag: str = None) -> str:
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the url's, delimited
+    by a period.
+    """
+    url_bytes = url.encode('utf-8')
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode('utf-8')
+        etag_hash = sha256(etag_bytes)
+        filename += '.' + etag_hash.hexdigest()
+
+    return filename
+
+def http_get(url: str, temp_file: IO) -> None:
+    req = requests.get(url, stream=True)
+    content_length = req.headers.get('Content-Length')
+    total = int(content_length) if content_length is not None else None
+    progress = tqdm(unit="B", total=total)
+    for chunk in req.iter_content(chunk_size=1024):
+        if chunk: # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given a URL, look for the corresponding dataset in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    os.makedirs(cache_dir, exist_ok=True)
+
+    # Get eTag to add to filename, if it exists.
+    if url.startswith("s3://"):
+        etag = s3_etag(url)
+    else:
+        response = requests.head(url, allow_redirects=True)
+        if response.status_code != 200:
+            raise IOError("HEAD request failed for url {} with status code {}"
+                          .format(url, response.status_code))
+        etag = response.headers.get("ETag")
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    if not os.path.exists(cache_path):
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with tempfile.NamedTemporaryFile() as temp_file:
+            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
+
+            # GET file object
+            if url.startswith("s3://"):
+                s3_get(url, temp_file)
+            else:
+                http_get(url, temp_file)
+
+            # we are copying the file before closing it, so flush to avoid truncation
+            temp_file.flush()
+            # shutil.copyfileobj() starts at the current position, so go to the start
+            temp_file.seek(0)
+
+            logger.info("copying %s to cache at %s", temp_file.name, cache_path)
+            with open(cache_path, 'wb') as cache_file:
+                shutil.copyfileobj(temp_file, cache_file)
+
+            logger.info("creating metadata file for %s", cache_path)
+            meta = {'url': url, 'etag': etag}
+            meta_path = cache_path + '.json'
+            with open(meta_path, 'w') as meta_file:
+                json.dump(meta, meta_file)
+
+            logger.info("removing temp file %s", temp_file.name)
+
+    return cache_path
+
+def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    parsed = urlparse(url_or_filename)
+
+    if parsed.scheme in ('http', 'https', 's3'):
+        # URL, so get it from the cache (downloading if necessary)
+        return get_from_cache(url_or_filename, cache_dir)
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        return url_or_filename
+    elif parsed.scheme == '':
+        # File, but it doesn't exist.
+        raise FileNotFoundError("file {} not found".format(url_or_filename))
+    else:
+        # Something unknown
+        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            if token not in self.vocab:
+                ids.append(self.vocab["[UNK]"])
+                logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token))
+            else:
+                ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            raise ValueError(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Download and cache the pre-trained model file if needed.
+        """
+        vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(vocab_file) is False:
+            if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
+                vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
+            else:
+                vocab_file = pretrained_model_name
+        if os.path.isdir(vocab_file):
+            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
+        # redirect to the cache, if necessary
+        print(vocab_file)
+        try:
+            resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            logger.error(
+                "Model name '{}' was not found. "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name,
+                    vocab_file))
+            return None
+        if resolved_vocab_file == vocab_file:
+            logger.info("loading vocabulary file {}".format(vocab_file))
+        else:
+            logger.info("loading vocabulary file {} from cache at {}".format(
+                vocab_file, resolved_vocab_file))
+        if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
+            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
+            # than the number of positional embeddings
+            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name]
+            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
+            kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")
+
+        # Instantiate tokenizer.
+        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
+
+        return tokenizer
+
+    def add_tokens(self, new_tokens, model):
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+        Args:
+            new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        Returns:
+            Number of tokens added to the vocabulary.
+        Examples::
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+
+        to_add_tokens = []
+        for token in new_tokens:
+            assert isinstance(token, str)
+            to_add_tokens.append(token)
+            # logger.info("Adding %s to the vocabulary", token)
+
+        vocab = collections.OrderedDict()
+        for token in self.vocab.keys():
+            vocab[token] = self.vocab[token]
+        for token in to_add_tokens:
+            vocab[token] = len(vocab)
+        self.vocab = self.wordpiece_tokenizer.vocab = vocab
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+
+        model.resize_token_embeddings(new_num_tokens=len(vocab))
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
+
+
+def gelu(x):
+    """Implementation of the gelu activation function.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+def swish(x):
+    return x * torch.sigmoid(x)
+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
+
+class LayerNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-12):
+        """Construct a layernorm module in the TF style (epsilon inside the square root).
+        """
+        super(LayerNorm, self).__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.bias = nn.Parameter(torch.zeros(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, x):
+        u = x.mean(-1, keepdim=True)
+        s = (x - u).pow(2).mean(-1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+        return self.weight * x + self.bias
+    
+class PretrainedConfig(object):
+
+    pretrained_model_archive_map = {}
+    config_name = ""
+    weights_name = ""
+
+    @classmethod
+    def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None):
+        archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(archive_file) is False:
+            if pretrained_model_name in cls.pretrained_model_archive_map:
+                archive_file = cls.pretrained_model_archive_map[pretrained_model_name]
+            else:
+                archive_file = pretrained_model_name
+
+        # redirect to the cache, if necessary
+        try:
+            resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            if task_config is None or task_config.local_rank == 0:
+                logger.error(
+                    "Model name '{}' was not found in model name list. "
+                    "We assumed '{}' was a path or url but couldn't find any file "
+                    "associated to this path or url.".format(
+                        pretrained_model_name,
+                        archive_file))
+            return None
+        if resolved_archive_file == archive_file:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {}".format(archive_file))
+        else:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {} from cache at {}".format(
+                    archive_file, resolved_archive_file))
+        tempdir = None
+        if os.path.isdir(resolved_archive_file):
+            serialization_dir = resolved_archive_file
+        else:
+            # Extract archive to temp dir
+            tempdir = tempfile.mkdtemp()
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("extracting archive file {} to temp dir {}".format(
+                    resolved_archive_file, tempdir))
+            with tarfile.open(resolved_archive_file, 'r:gz') as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        # Load config
+        config_file = os.path.join(serialization_dir, cls.config_name)
+        config = cls.from_json_file(config_file)
+        config.type_vocab_size = type_vocab_size
+        if task_config is None or task_config.local_rank == 0:
+            logger.info("Model config {}".format(config))
+
+        if state_dict is None:
+            weights_path = os.path.join(serialization_dir, cls.weights_name)
+            if os.path.exists(weights_path):
+                state_dict = torch.load(weights_path, map_location='cpu')
+            else:
+                if task_config is None or task_config.local_rank == 0:
+                    logger.info("Weight doesn't exsits. {}".format(weights_path))
+
+        if tempdir:
+            # Clean up temp dir
+            shutil.rmtree(tempdir)
+
+        return config, state_dict
+
+    @classmethod
+    def from_dict(cls, json_object):
+        """Constructs a `BertConfig` from a Python dictionary of parameters."""
+        config = cls(vocab_size_or_config_json_file=-1)
+        for key, value in json_object.items():
+            config.__dict__[key] = value
+        return config
+
+    @classmethod
+    def from_json_file(cls, json_file):
+        """Constructs a `BertConfig` from a json file of parameters."""
+        with open(json_file, "r", encoding='utf-8') as reader:
+            text = reader.read()
+        return cls.from_dict(json.loads(text))
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+    
+class BertConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `BertModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs BertConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `BertModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class PreTrainedModel(nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, config, *inputs, **kwargs):
+        super(PreTrainedModel, self).__init__()
+        # if not isinstance(config, PretrainedConfig):
+        #     raise ValueError(
+        #         "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+        #         "To create a model from a Google pretrained model use "
+        #         "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+        #             self.__class__.__name__, self.__class__.__name__
+        #         ))
+        self.config = config
+
+    def init_weights(self, module):
+        """ Initialize the weights.
+        """
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, LayerNorm):
+            if 'beta' in dir(module) and 'gamma' in dir(module):
+                module.beta.data.zero_()
+                module.gamma.data.fill_(1.0)
+            else:
+                module.bias.data.zero_()
+                module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def resize_token_embeddings(self, new_num_tokens=None):
+        raise NotImplementedError
+
+    @classmethod
+    def init_preweight(cls, model, state_dict, prefix=None, task_config=None):
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if 'gamma' in key:
+                new_key = key.replace('gamma', 'weight')
+            if 'beta' in key:
+                new_key = key.replace('beta', 'bias')
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        if prefix is not None:
+            old_keys = []
+            new_keys = []
+            for key in state_dict.keys():
+                old_keys.append(key)
+                new_keys.append(prefix + key)
+            for old_key, new_key in zip(old_keys, new_keys):
+                state_dict[new_key] = state_dict.pop(old_key)
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, '_metadata', None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=''):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + '.')
+
+        load(model, prefix='')
+
+        if prefix is None and (task_config is None or task_config.local_rank == 0):
+            logger.info("-" * 20)
+            if len(missing_keys) > 0:
+                logger.info("Weights of {} not initialized from pretrained model: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(missing_keys)))
+            if len(unexpected_keys) > 0:
+                logger.info("Weights from pretrained model not used in {}: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(unexpected_keys)))
+            if len(error_msgs) > 0:
+                logger.error("Weights from pretrained model cause errors in {}: {}"
+                             .format(model.__class__.__name__, "\n   " + "\n   ".join(error_msgs)))
+
+        return model
+
+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
+    @classmethod
+    def from_pretrained(cls, config, state_dict=None,  *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict.
+        Download and cache the pre-trained model file if needed.
+        """
+        # Instantiate model.
+        model = cls(config, *inputs, **kwargs)
+        if state_dict is None:
+            return model
+        model = cls.init_preweight(model, state_dict)
+
+        return model
+
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(BertEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids, token_type_ids=None):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = words_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(BertSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super(BertAttention, self).__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super(BertLayer, self).__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super(BertEncoder, self).__init__()
+        layer = BertLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super(BertPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(bert_model_embedding_weights.size(1),
+                                 bert_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = bert_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class BertOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(BertOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class BertPreTrainingHeads(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertPreTrainingHeads, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+class BertModel(PreTrainedModel):
+    """BERT model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a BertConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see BERT paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see BERT's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
+
+    config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.BertModel(config=config)
+    all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(BertModel, self).__init__(config)
+        self.config = config
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(input_ids, token_type_ids)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
+
+
+def build_UniVL_text_encoder(dict):
+    bert_config = BertConfig.from_dict(dict)
+    bert = BertModel(bert_config)
+
+    return bert
+
+def build_UniVL_tokenizer():
+    return BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
+
+
+
+def load_pretrained_UniVL(args, device, n_gpu, local_rank, init_model=None):
+
+    if init_model:
+        model_state_dict = torch.load(init_model, map_location='cpu')
+    else:
+        model_state_dict = None
+
+    # Prepare model
+    cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed')
+    model = UniVL.from_pretrained('bert-base-uncased', 'visual-base', 'cross-base', 'decoder-base',
+                                   cache_dir=cache_dir, state_dict=model_state_dict, task_config=args)
+
+    model.to(device)
+
+    return model
+
+if __name__ == '__main__':
+    bert_config_dict = {
+    "attention_probs_dropout_prob": 0.1,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "max_position_embeddings": 512,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "type_vocab_size": 2,
+    "vocab_size": 30522
+    }
+    tokenizer = build_UniVL_tokenizer()
+    bert = build_UniVL_text_encoder(bert_config_dict)
+    words = ["[CLS]"] + ['you', 'love', 'you'] + ["[SEP]"]
+    #input_ids = tokenizer.convert_tokens_to_ids(words)
+    #masked_tokens = words.copy()
+    #masked_token_ids = tokenizer.convert_tokens_to_ids(masked_tokens)
+    token_type_ids = None
+    breakpoint()
+    encoded_layers, _ = bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True)
+    sequence_output = encoded_layers[-1]
+
diff --git a/yc2_univl/backup/pdvc/modules/__init__.py b/yc2_univl/backup/pdvc/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d34e0a02cf990fffc878b695beee9637074e33d0
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..874dd9210e523da3f66f8b15054a96cadeee908f
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/file_utils.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c2030617dd39a30551bcda930768bb5af198af31
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/modeling.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3218f01ae734e108885fd322ff9db4dc73b204fe
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_bert.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0585085b54395651e7fa6b8fb60d877f579733ce
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_cross.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f39ca45e9cc3f91242f4e039001dc5f6f2636af
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_decoder.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93a08af0acd6f720e525203d381fe91f1bc3b33f
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/module_visual.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6631deadc8c18f93e755eca7dd975b6ce83b6ca1
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/optimization.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5148122f4202468a675012274b2eead3a84a1510
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/tokenization.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff40d3cabb4bad221fc02eb703a9b76971c01709
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/until_config.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc b/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64204d60a2c8da0639a86e05802791c7a65e4c17
Binary files /dev/null and b/yc2_univl/backup/pdvc/modules/__pycache__/until_module.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/modules/beam.py b/yc2_univl/backup/pdvc/modules/beam.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff1d961ef393e03a3c9105022b1047f5ea7133d
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/beam.py
@@ -0,0 +1,116 @@
+"""
+Manage beam search info structure.
+Heavily borrowed from OpenNMT-py.
+For code in OpenNMT-py, please check the following link (maybe in oldest version):
+https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py
+"""
+
+import torch
+
+class Constants():
+    def __init__(self):
+        self.PAD = 0
+        self.UNK = 1
+        self.BOS = 2
+        self.EOS = 3
+        self.PAD_WORD = '[PAD]'
+        self.UNK_WORD = '[UNK]'
+        self.BOS_WORD = '[CLS]'
+        self.EOS_WORD = '[SEP]'
+
+    @classmethod
+    def from_tokenizer(cls, tokenizer):
+        instance = cls()
+        instance.PAD = tokenizer.vocab[instance.PAD_WORD]
+        instance.UNK = tokenizer.vocab[instance.UNK_WORD]
+        instance.BOS = tokenizer.vocab[instance.BOS_WORD]
+        instance.EOS = tokenizer.vocab[instance.EOS_WORD]
+        return instance
+
+class Beam():
+    ''' Beam search '''
+
+    def __init__(self, size, device=False, tokenizer=None):
+        if tokenizer is None:
+            self.constants = Constants()
+        else:
+            self.constants = Constants.from_tokenizer(tokenizer)
+
+        self.size = size
+        self._done = False
+        # The score for each interface on the beam.
+        self.scores = torch.zeros((size,), dtype=torch.float, device=device)
+        self.all_scores = []
+
+        # The backpointers at each time-step.
+        self.prev_ks = []
+
+        # The outputs at each time-step.
+        self.next_ys = [torch.full((size,), self.constants.BOS, dtype=torch.long, device=device)]
+
+    def get_current_state(self):
+        "Get the outputs for the current timestep."
+        return self.get_tentative_hypothesis()
+
+    def get_current_origin(self):
+        "Get the backpointers for the current timestep."
+        return self.prev_ks[-1]
+
+    @property
+    def done(self):
+        return self._done
+
+    def advance(self, word_prob, word_length=None):
+
+        "Update beam status and check if finished or not."
+        num_words = word_prob.size(1)
+        # Sum the previous scores.
+        if len(self.prev_ks) > 0:
+            beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
+        else:
+            beam_lk = word_prob[0]
+        flat_beam_lk = beam_lk.view(-1)
+        best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort
+        self.all_scores.append(self.scores)
+        self.scores = best_scores
+        # bestScoresId is flattened as a (beam x word) array,
+        # so we need to calculate which word and beam each score came from
+        prev_k = best_scores_id // num_words
+        self.prev_ks.append(prev_k)
+        self.next_ys.append(best_scores_id - prev_k * num_words)
+        # End condition is when top-of-beam is EOS.
+        if self.next_ys[-1][0].item() == self.constants.EOS:
+            self._done = True
+
+        return self._done
+
+    def sort_scores(self):
+        "Sort the scores."
+        return torch.sort(self.scores, 0, True)
+
+    def get_the_best_score_and_idx(self):
+        "Get the score of the best in the beam."
+        scores, ids = self.sort_scores()
+        return scores[1], ids[1]
+
+    def get_tentative_hypothesis(self):
+        "Get the decoded sequence for the current timestep."
+
+        if len(self.next_ys) == 1:
+            dec_seq = self.next_ys[0].unsqueeze(1)
+        else:
+            _, keys = self.sort_scores()
+            hyps = [self.get_hypothesis(k) for k in keys]
+            hyps = [[self.constants.BOS] + h for h in hyps]
+            dec_seq = torch.LongTensor(hyps)
+
+        return dec_seq
+
+    def get_hypothesis(self, k):
+        """ Walk back to construct the full hypothesis. """
+        hyp = []
+        for j in range(len(self.prev_ks) - 1, -1, -1):
+            hyp.append(self.next_ys[j+1][k])
+            k = self.prev_ks[j][k]
+
+        return list(map(lambda x: x.item(), hyp[::-1]))
diff --git a/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json b/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..fca794a5f07ff8f963fe8b61e3694b0fb7f955df
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/bert-base-uncased/bert_config.json
@@ -0,0 +1,13 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}
diff --git a/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt b/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb140275c155a9c7c5a3b3e0e77a9e839594a938
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/bert-base-uncased/vocab.txt
@@ -0,0 +1,30522 @@
+[PAD]
+[unused0]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+[unused99]
+[unused100]
+[unused101]
+[unused102]
+[unused103]
+[unused104]
+[unused105]
+[unused106]
+[unused107]
+[unused108]
+[unused109]
+[unused110]
+[unused111]
+[unused112]
+[unused113]
+[unused114]
+[unused115]
+[unused116]
+[unused117]
+[unused118]
+[unused119]
+[unused120]
+[unused121]
+[unused122]
+[unused123]
+[unused124]
+[unused125]
+[unused126]
+[unused127]
+[unused128]
+[unused129]
+[unused130]
+[unused131]
+[unused132]
+[unused133]
+[unused134]
+[unused135]
+[unused136]
+[unused137]
+[unused138]
+[unused139]
+[unused140]
+[unused141]
+[unused142]
+[unused143]
+[unused144]
+[unused145]
+[unused146]
+[unused147]
+[unused148]
+[unused149]
+[unused150]
+[unused151]
+[unused152]
+[unused153]
+[unused154]
+[unused155]
+[unused156]
+[unused157]
+[unused158]
+[unused159]
+[unused160]
+[unused161]
+[unused162]
+[unused163]
+[unused164]
+[unused165]
+[unused166]
+[unused167]
+[unused168]
+[unused169]
+[unused170]
+[unused171]
+[unused172]
+[unused173]
+[unused174]
+[unused175]
+[unused176]
+[unused177]
+[unused178]
+[unused179]
+[unused180]
+[unused181]
+[unused182]
+[unused183]
+[unused184]
+[unused185]
+[unused186]
+[unused187]
+[unused188]
+[unused189]
+[unused190]
+[unused191]
+[unused192]
+[unused193]
+[unused194]
+[unused195]
+[unused196]
+[unused197]
+[unused198]
+[unused199]
+[unused200]
+[unused201]
+[unused202]
+[unused203]
+[unused204]
+[unused205]
+[unused206]
+[unused207]
+[unused208]
+[unused209]
+[unused210]
+[unused211]
+[unused212]
+[unused213]
+[unused214]
+[unused215]
+[unused216]
+[unused217]
+[unused218]
+[unused219]
+[unused220]
+[unused221]
+[unused222]
+[unused223]
+[unused224]
+[unused225]
+[unused226]
+[unused227]
+[unused228]
+[unused229]
+[unused230]
+[unused231]
+[unused232]
+[unused233]
+[unused234]
+[unused235]
+[unused236]
+[unused237]
+[unused238]
+[unused239]
+[unused240]
+[unused241]
+[unused242]
+[unused243]
+[unused244]
+[unused245]
+[unused246]
+[unused247]
+[unused248]
+[unused249]
+[unused250]
+[unused251]
+[unused252]
+[unused253]
+[unused254]
+[unused255]
+[unused256]
+[unused257]
+[unused258]
+[unused259]
+[unused260]
+[unused261]
+[unused262]
+[unused263]
+[unused264]
+[unused265]
+[unused266]
+[unused267]
+[unused268]
+[unused269]
+[unused270]
+[unused271]
+[unused272]
+[unused273]
+[unused274]
+[unused275]
+[unused276]
+[unused277]
+[unused278]
+[unused279]
+[unused280]
+[unused281]
+[unused282]
+[unused283]
+[unused284]
+[unused285]
+[unused286]
+[unused287]
+[unused288]
+[unused289]
+[unused290]
+[unused291]
+[unused292]
+[unused293]
+[unused294]
+[unused295]
+[unused296]
+[unused297]
+[unused298]
+[unused299]
+[unused300]
+[unused301]
+[unused302]
+[unused303]
+[unused304]
+[unused305]
+[unused306]
+[unused307]
+[unused308]
+[unused309]
+[unused310]
+[unused311]
+[unused312]
+[unused313]
+[unused314]
+[unused315]
+[unused316]
+[unused317]
+[unused318]
+[unused319]
+[unused320]
+[unused321]
+[unused322]
+[unused323]
+[unused324]
+[unused325]
+[unused326]
+[unused327]
+[unused328]
+[unused329]
+[unused330]
+[unused331]
+[unused332]
+[unused333]
+[unused334]
+[unused335]
+[unused336]
+[unused337]
+[unused338]
+[unused339]
+[unused340]
+[unused341]
+[unused342]
+[unused343]
+[unused344]
+[unused345]
+[unused346]
+[unused347]
+[unused348]
+[unused349]
+[unused350]
+[unused351]
+[unused352]
+[unused353]
+[unused354]
+[unused355]
+[unused356]
+[unused357]
+[unused358]
+[unused359]
+[unused360]
+[unused361]
+[unused362]
+[unused363]
+[unused364]
+[unused365]
+[unused366]
+[unused367]
+[unused368]
+[unused369]
+[unused370]
+[unused371]
+[unused372]
+[unused373]
+[unused374]
+[unused375]
+[unused376]
+[unused377]
+[unused378]
+[unused379]
+[unused380]
+[unused381]
+[unused382]
+[unused383]
+[unused384]
+[unused385]
+[unused386]
+[unused387]
+[unused388]
+[unused389]
+[unused390]
+[unused391]
+[unused392]
+[unused393]
+[unused394]
+[unused395]
+[unused396]
+[unused397]
+[unused398]
+[unused399]
+[unused400]
+[unused401]
+[unused402]
+[unused403]
+[unused404]
+[unused405]
+[unused406]
+[unused407]
+[unused408]
+[unused409]
+[unused410]
+[unused411]
+[unused412]
+[unused413]
+[unused414]
+[unused415]
+[unused416]
+[unused417]
+[unused418]
+[unused419]
+[unused420]
+[unused421]
+[unused422]
+[unused423]
+[unused424]
+[unused425]
+[unused426]
+[unused427]
+[unused428]
+[unused429]
+[unused430]
+[unused431]
+[unused432]
+[unused433]
+[unused434]
+[unused435]
+[unused436]
+[unused437]
+[unused438]
+[unused439]
+[unused440]
+[unused441]
+[unused442]
+[unused443]
+[unused444]
+[unused445]
+[unused446]
+[unused447]
+[unused448]
+[unused449]
+[unused450]
+[unused451]
+[unused452]
+[unused453]
+[unused454]
+[unused455]
+[unused456]
+[unused457]
+[unused458]
+[unused459]
+[unused460]
+[unused461]
+[unused462]
+[unused463]
+[unused464]
+[unused465]
+[unused466]
+[unused467]
+[unused468]
+[unused469]
+[unused470]
+[unused471]
+[unused472]
+[unused473]
+[unused474]
+[unused475]
+[unused476]
+[unused477]
+[unused478]
+[unused479]
+[unused480]
+[unused481]
+[unused482]
+[unused483]
+[unused484]
+[unused485]
+[unused486]
+[unused487]
+[unused488]
+[unused489]
+[unused490]
+[unused491]
+[unused492]
+[unused493]
+[unused494]
+[unused495]
+[unused496]
+[unused497]
+[unused498]
+[unused499]
+[unused500]
+[unused501]
+[unused502]
+[unused503]
+[unused504]
+[unused505]
+[unused506]
+[unused507]
+[unused508]
+[unused509]
+[unused510]
+[unused511]
+[unused512]
+[unused513]
+[unused514]
+[unused515]
+[unused516]
+[unused517]
+[unused518]
+[unused519]
+[unused520]
+[unused521]
+[unused522]
+[unused523]
+[unused524]
+[unused525]
+[unused526]
+[unused527]
+[unused528]
+[unused529]
+[unused530]
+[unused531]
+[unused532]
+[unused533]
+[unused534]
+[unused535]
+[unused536]
+[unused537]
+[unused538]
+[unused539]
+[unused540]
+[unused541]
+[unused542]
+[unused543]
+[unused544]
+[unused545]
+[unused546]
+[unused547]
+[unused548]
+[unused549]
+[unused550]
+[unused551]
+[unused552]
+[unused553]
+[unused554]
+[unused555]
+[unused556]
+[unused557]
+[unused558]
+[unused559]
+[unused560]
+[unused561]
+[unused562]
+[unused563]
+[unused564]
+[unused565]
+[unused566]
+[unused567]
+[unused568]
+[unused569]
+[unused570]
+[unused571]
+[unused572]
+[unused573]
+[unused574]
+[unused575]
+[unused576]
+[unused577]
+[unused578]
+[unused579]
+[unused580]
+[unused581]
+[unused582]
+[unused583]
+[unused584]
+[unused585]
+[unused586]
+[unused587]
+[unused588]
+[unused589]
+[unused590]
+[unused591]
+[unused592]
+[unused593]
+[unused594]
+[unused595]
+[unused596]
+[unused597]
+[unused598]
+[unused599]
+[unused600]
+[unused601]
+[unused602]
+[unused603]
+[unused604]
+[unused605]
+[unused606]
+[unused607]
+[unused608]
+[unused609]
+[unused610]
+[unused611]
+[unused612]
+[unused613]
+[unused614]
+[unused615]
+[unused616]
+[unused617]
+[unused618]
+[unused619]
+[unused620]
+[unused621]
+[unused622]
+[unused623]
+[unused624]
+[unused625]
+[unused626]
+[unused627]
+[unused628]
+[unused629]
+[unused630]
+[unused631]
+[unused632]
+[unused633]
+[unused634]
+[unused635]
+[unused636]
+[unused637]
+[unused638]
+[unused639]
+[unused640]
+[unused641]
+[unused642]
+[unused643]
+[unused644]
+[unused645]
+[unused646]
+[unused647]
+[unused648]
+[unused649]
+[unused650]
+[unused651]
+[unused652]
+[unused653]
+[unused654]
+[unused655]
+[unused656]
+[unused657]
+[unused658]
+[unused659]
+[unused660]
+[unused661]
+[unused662]
+[unused663]
+[unused664]
+[unused665]
+[unused666]
+[unused667]
+[unused668]
+[unused669]
+[unused670]
+[unused671]
+[unused672]
+[unused673]
+[unused674]
+[unused675]
+[unused676]
+[unused677]
+[unused678]
+[unused679]
+[unused680]
+[unused681]
+[unused682]
+[unused683]
+[unused684]
+[unused685]
+[unused686]
+[unused687]
+[unused688]
+[unused689]
+[unused690]
+[unused691]
+[unused692]
+[unused693]
+[unused694]
+[unused695]
+[unused696]
+[unused697]
+[unused698]
+[unused699]
+[unused700]
+[unused701]
+[unused702]
+[unused703]
+[unused704]
+[unused705]
+[unused706]
+[unused707]
+[unused708]
+[unused709]
+[unused710]
+[unused711]
+[unused712]
+[unused713]
+[unused714]
+[unused715]
+[unused716]
+[unused717]
+[unused718]
+[unused719]
+[unused720]
+[unused721]
+[unused722]
+[unused723]
+[unused724]
+[unused725]
+[unused726]
+[unused727]
+[unused728]
+[unused729]
+[unused730]
+[unused731]
+[unused732]
+[unused733]
+[unused734]
+[unused735]
+[unused736]
+[unused737]
+[unused738]
+[unused739]
+[unused740]
+[unused741]
+[unused742]
+[unused743]
+[unused744]
+[unused745]
+[unused746]
+[unused747]
+[unused748]
+[unused749]
+[unused750]
+[unused751]
+[unused752]
+[unused753]
+[unused754]
+[unused755]
+[unused756]
+[unused757]
+[unused758]
+[unused759]
+[unused760]
+[unused761]
+[unused762]
+[unused763]
+[unused764]
+[unused765]
+[unused766]
+[unused767]
+[unused768]
+[unused769]
+[unused770]
+[unused771]
+[unused772]
+[unused773]
+[unused774]
+[unused775]
+[unused776]
+[unused777]
+[unused778]
+[unused779]
+[unused780]
+[unused781]
+[unused782]
+[unused783]
+[unused784]
+[unused785]
+[unused786]
+[unused787]
+[unused788]
+[unused789]
+[unused790]
+[unused791]
+[unused792]
+[unused793]
+[unused794]
+[unused795]
+[unused796]
+[unused797]
+[unused798]
+[unused799]
+[unused800]
+[unused801]
+[unused802]
+[unused803]
+[unused804]
+[unused805]
+[unused806]
+[unused807]
+[unused808]
+[unused809]
+[unused810]
+[unused811]
+[unused812]
+[unused813]
+[unused814]
+[unused815]
+[unused816]
+[unused817]
+[unused818]
+[unused819]
+[unused820]
+[unused821]
+[unused822]
+[unused823]
+[unused824]
+[unused825]
+[unused826]
+[unused827]
+[unused828]
+[unused829]
+[unused830]
+[unused831]
+[unused832]
+[unused833]
+[unused834]
+[unused835]
+[unused836]
+[unused837]
+[unused838]
+[unused839]
+[unused840]
+[unused841]
+[unused842]
+[unused843]
+[unused844]
+[unused845]
+[unused846]
+[unused847]
+[unused848]
+[unused849]
+[unused850]
+[unused851]
+[unused852]
+[unused853]
+[unused854]
+[unused855]
+[unused856]
+[unused857]
+[unused858]
+[unused859]
+[unused860]
+[unused861]
+[unused862]
+[unused863]
+[unused864]
+[unused865]
+[unused866]
+[unused867]
+[unused868]
+[unused869]
+[unused870]
+[unused871]
+[unused872]
+[unused873]
+[unused874]
+[unused875]
+[unused876]
+[unused877]
+[unused878]
+[unused879]
+[unused880]
+[unused881]
+[unused882]
+[unused883]
+[unused884]
+[unused885]
+[unused886]
+[unused887]
+[unused888]
+[unused889]
+[unused890]
+[unused891]
+[unused892]
+[unused893]
+[unused894]
+[unused895]
+[unused896]
+[unused897]
+[unused898]
+[unused899]
+[unused900]
+[unused901]
+[unused902]
+[unused903]
+[unused904]
+[unused905]
+[unused906]
+[unused907]
+[unused908]
+[unused909]
+[unused910]
+[unused911]
+[unused912]
+[unused913]
+[unused914]
+[unused915]
+[unused916]
+[unused917]
+[unused918]
+[unused919]
+[unused920]
+[unused921]
+[unused922]
+[unused923]
+[unused924]
+[unused925]
+[unused926]
+[unused927]
+[unused928]
+[unused929]
+[unused930]
+[unused931]
+[unused932]
+[unused933]
+[unused934]
+[unused935]
+[unused936]
+[unused937]
+[unused938]
+[unused939]
+[unused940]
+[unused941]
+[unused942]
+[unused943]
+[unused944]
+[unused945]
+[unused946]
+[unused947]
+[unused948]
+[unused949]
+[unused950]
+[unused951]
+[unused952]
+[unused953]
+[unused954]
+[unused955]
+[unused956]
+[unused957]
+[unused958]
+[unused959]
+[unused960]
+[unused961]
+[unused962]
+[unused963]
+[unused964]
+[unused965]
+[unused966]
+[unused967]
+[unused968]
+[unused969]
+[unused970]
+[unused971]
+[unused972]
+[unused973]
+[unused974]
+[unused975]
+[unused976]
+[unused977]
+[unused978]
+[unused979]
+[unused980]
+[unused981]
+[unused982]
+[unused983]
+[unused984]
+[unused985]
+[unused986]
+[unused987]
+[unused988]
+[unused989]
+[unused990]
+[unused991]
+[unused992]
+[unused993]
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+¡
+¢
+£
+¤
+¥
+¦
+§
+¨
+©
+ª
+«
+¬
+®
+°
+±
+²
+³
+´
+µ
+¶
+·
+¹
+º
+»
+¼
+½
+¾
+¿
+×
+ß
+æ
+ð
+÷
+ø
+þ
+đ
+ħ
+ı
+ł
+ŋ
+œ
+ƒ
+ɐ
+ɑ
+ɒ
+ɔ
+ɕ
+ə
+ɛ
+ɡ
+ɣ
+ɨ
+ɪ
+ɫ
+ɬ
+ɯ
+ɲ
+ɴ
+ɹ
+ɾ
+ʀ
+ʁ
+ʂ
+ʃ
+ʉ
+ʊ
+ʋ
+ʌ
+ʎ
+ʐ
+ʑ
+ʒ
+ʔ
+ʰ
+ʲ
+ʳ
+ʷ
+ʸ
+ʻ
+ʼ
+ʾ
+ʿ
+ˈ
+ː
+ˡ
+ˢ
+ˣ
+ˤ
+α
+β
+γ
+δ
+ε
+ζ
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ξ
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+щ
+ъ
+ы
+ь
+э
+ю
+я
+ђ
+є
+і
+ј
+љ
+њ
+ћ
+ӏ
+ա
+բ
+գ
+դ
+ե
+թ
+ի
+լ
+կ
+հ
+մ
+յ
+ն
+ո
+պ
+ս
+վ
+տ
+ր
+ւ
+ք
+־
+א
+ב
+ג
+ד
+ה
+ו
+ז
+ח
+ט
+י
+ך
+כ
+ל
+ם
+מ
+ן
+נ
+ס
+ע
+ף
+פ
+ץ
+צ
+ק
+ר
+ש
+ת
+،
+ء
+ا
+ب
+ة
+ت
+ث
+ج
+ح
+خ
+د
+ذ
+ر
+ز
+س
+ش
+ص
+ض
+ط
+ظ
+ع
+غ
+ـ
+ف
+ق
+ك
+ل
+م
+ن
+ه
+و
+ى
+ي
+ٹ
+پ
+چ
+ک
+گ
+ں
+ھ
+ہ
+ی
+ے
+अ
+आ
+उ
+ए
+क
+ख
+ग
+च
+ज
+ट
+ड
+ण
+त
+थ
+द
+ध
+न
+प
+ब
+भ
+म
+य
+र
+ल
+व
+श
+ष
+स
+ह
+ा
+ि
+ी
+ो
+।
+॥
+ং
+অ
+আ
+ই
+উ
+এ
+ও
+ক
+খ
+গ
+চ
+ছ
+জ
+ট
+ড
+ণ
+ত
+থ
+দ
+ধ
+ন
+প
+ব
+ভ
+ম
+য
+র
+ল
+শ
+ষ
+স
+হ
+া
+ি
+ী
+ে
+க
+ச
+ட
+த
+ந
+ன
+ப
+ம
+ய
+ர
+ல
+ள
+வ
+ா
+ி
+ு
+ே
+ை
+ನ
+ರ
+ಾ
+ක
+ය
+ර
+ල
+ව
+ා
+ก
+ง
+ต
+ท
+น
+พ
+ม
+ย
+ร
+ล
+ว
+ส
+อ
+า
+เ
+་
+།
+ག
+ང
+ད
+ན
+པ
+བ
+མ
+འ
+ར
+ལ
+ས
+မ
+ა
+ბ
+გ
+დ
+ე
+ვ
+თ
+ი
+კ
+ლ
+მ
+ნ
+ო
+რ
+ს
+ტ
+უ
+ᄀ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄉ
+ᄊ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅥ
+ᅦ
+ᅧ
+ᅩ
+ᅪ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆼ
+ᴬ
+ᴮ
+ᴰ
+ᴵ
+ᴺ
+ᵀ
+ᵃ
+ᵇ
+ᵈ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵖ
+ᵗ
+ᵘ
+ᵢ
+ᵣ
+ᵤ
+ᵥ
+ᶜ
+ᶠ
+‐
+‑
+‒
+–
+—
+―
+‖
+‘
+’
+‚
+“
+”
+„
+†
+‡
+•
+…
+‰
+′
+″
+›
+‿
+⁄
+⁰
+ⁱ
+⁴
+⁵
+⁶
+⁷
+⁸
+⁹
+⁺
+⁻
+ⁿ
+₀
+₁
+₂
+₃
+₄
+₅
+₆
+₇
+₈
+₉
+₊
+₍
+₎
+ₐ
+ₑ
+ₒ
+ₓ
+ₕ
+ₖ
+ₗ
+ₘ
+ₙ
+ₚ
+ₛ
+ₜ
+₤
+₩
+€
+₱
+₹
+ℓ
+№
+ℝ
+™
+⅓
+⅔
+←
+↑
+→
+↓
+↔
+↦
+⇄
+⇌
+⇒
+∂
+∅
+∆
+∇
+∈
+−
+∗
+∘
+√
+∞
+∧
+∨
+∩
+∪
+≈
+≡
+≤
+≥
+⊂
+⊆
+⊕
+⊗
+⋅
+─
+│
+■
+▪
+●
+★
+☆
+☉
+♠
+♣
+♥
+♦
+♭
+♯
+⟨
+⟩
+ⱼ
+⺩
+⺼
+⽥
+、
+。
+〈
+〉
+《
+》
+「
+」
+『
+』
+〜
+あ
+い
+う
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+や
+ゆ
+よ
+ら
+り
+る
+れ
+ろ
+を
+ん
+ァ
+ア
+ィ
+イ
+ウ
+ェ
+エ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ュ
+ョ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ン
+・
+ー
+一
+三
+上
+下
+不
+世
+中
+主
+久
+之
+也
+事
+二
+五
+井
+京
+人
+亻
+仁
+介
+代
+仮
+伊
+会
+佐
+侍
+保
+信
+健
+元
+光
+八
+公
+内
+出
+分
+前
+劉
+力
+加
+勝
+北
+区
+十
+千
+南
+博
+原
+口
+古
+史
+司
+合
+吉
+同
+名
+和
+囗
+四
+国
+國
+土
+地
+坂
+城
+堂
+場
+士
+夏
+外
+大
+天
+太
+夫
+奈
+女
+子
+学
+宀
+宇
+安
+宗
+定
+宣
+宮
+家
+宿
+寺
+將
+小
+尚
+山
+岡
+島
+崎
+川
+州
+巿
+帝
+平
+年
+幸
+广
+弘
+張
+彳
+後
+御
+德
+心
+忄
+志
+忠
+愛
+成
+我
+戦
+戸
+手
+扌
+政
+文
+新
+方
+日
+明
+星
+春
+昭
+智
+曲
+書
+月
+有
+朝
+木
+本
+李
+村
+東
+松
+林
+森
+楊
+樹
+橋
+歌
+止
+正
+武
+比
+氏
+民
+水
+氵
+氷
+永
+江
+沢
+河
+治
+法
+海
+清
+漢
+瀬
+火
+版
+犬
+王
+生
+田
+男
+疒
+発
+白
+的
+皇
+目
+相
+省
+真
+石
+示
+社
+神
+福
+禾
+秀
+秋
+空
+立
+章
+竹
+糹
+美
+義
+耳
+良
+艹
+花
+英
+華
+葉
+藤
+行
+街
+西
+見
+訁
+語
+谷
+貝
+貴
+車
+軍
+辶
+道
+郎
+郡
+部
+都
+里
+野
+金
+鈴
+镇
+長
+門
+間
+阝
+阿
+陳
+陽
+雄
+青
+面
+風
+食
+香
+馬
+高
+龍
+龸
+ﬁ
+ﬂ
+！
+（
+）
+，
+－
+．
+／
+：
+？
+～
+the
+of
+and
+in
+to
+was
+he
+is
+as
+for
+on
+with
+that
+it
+his
+by
+at
+from
+her
+##s
+she
+you
+had
+an
+were
+but
+be
+this
+are
+not
+my
+they
+one
+which
+or
+have
+him
+me
+first
+all
+also
+their
+has
+up
+who
+out
+been
+when
+after
+there
+into
+new
+two
+its
+##a
+time
+would
+no
+what
+about
+said
+we
+over
+then
+other
+so
+more
+##e
+can
+if
+like
+back
+them
+only
+some
+could
+##i
+where
+just
+##ing
+during
+before
+##n
+do
+##o
+made
+school
+through
+than
+now
+years
+most
+world
+may
+between
+down
+well
+three
+##d
+year
+while
+will
+##ed
+##r
+##y
+later
+##t
+city
+under
+around
+did
+such
+being
+used
+state
+people
+part
+know
+against
+your
+many
+second
+university
+both
+national
+##er
+these
+don
+known
+off
+way
+until
+re
+how
+even
+get
+head
+...
+didn
+##ly
+team
+american
+because
+de
+##l
+born
+united
+film
+since
+still
+long
+work
+south
+us
+became
+any
+high
+again
+day
+family
+see
+right
+man
+eyes
+house
+season
+war
+states
+including
+took
+life
+north
+same
+each
+called
+name
+much
+place
+however
+go
+four
+group
+another
+found
+won
+area
+here
+going
+10
+away
+series
+left
+home
+music
+best
+make
+hand
+number
+company
+several
+never
+last
+john
+000
+very
+album
+take
+end
+good
+too
+following
+released
+game
+played
+little
+began
+district
+##m
+old
+want
+those
+side
+held
+own
+early
+county
+ll
+league
+use
+west
+##u
+face
+think
+##es
+2010
+government
+##h
+march
+came
+small
+general
+town
+june
+##on
+line
+based
+something
+##k
+september
+thought
+looked
+along
+international
+2011
+air
+july
+club
+went
+january
+october
+our
+august
+april
+york
+12
+few
+2012
+2008
+east
+show
+member
+college
+2009
+father
+public
+##us
+come
+men
+five
+set
+station
+church
+##c
+next
+former
+november
+room
+party
+located
+december
+2013
+age
+got
+2007
+##g
+system
+let
+love
+2006
+though
+every
+2014
+look
+song
+water
+century
+without
+body
+black
+night
+within
+great
+women
+single
+ve
+building
+large
+population
+river
+named
+band
+white
+started
+##an
+once
+15
+20
+should
+18
+2015
+service
+top
+built
+british
+open
+death
+king
+moved
+local
+times
+children
+february
+book
+why
+11
+door
+need
+president
+order
+final
+road
+wasn
+although
+due
+major
+died
+village
+third
+knew
+2016
+asked
+turned
+st
+wanted
+say
+##p
+together
+received
+main
+son
+served
+different
+##en
+behind
+himself
+felt
+members
+power
+football
+law
+voice
+play
+##in
+near
+park
+history
+30
+having
+2005
+16
+##man
+saw
+mother
+##al
+army
+point
+front
+help
+english
+street
+art
+late
+hands
+games
+award
+##ia
+young
+14
+put
+published
+country
+division
+across
+told
+13
+often
+ever
+french
+london
+center
+six
+red
+2017
+led
+days
+include
+light
+25
+find
+tell
+among
+species
+really
+according
+central
+half
+2004
+form
+original
+gave
+office
+making
+enough
+lost
+full
+opened
+must
+included
+live
+given
+german
+player
+run
+business
+woman
+community
+cup
+might
+million
+land
+2000
+court
+development
+17
+short
+round
+ii
+km
+seen
+class
+story
+always
+become
+sure
+research
+almost
+director
+council
+la
+##2
+career
+things
+using
+island
+##z
+couldn
+car
+##is
+24
+close
+force
+##1
+better
+free
+support
+control
+field
+students
+2003
+education
+married
+##b
+nothing
+worked
+others
+record
+big
+inside
+level
+anything
+continued
+give
+james
+##3
+military
+established
+non
+returned
+feel
+does
+title
+written
+thing
+feet
+william
+far
+co
+association
+hard
+already
+2002
+##ra
+championship
+human
+western
+100
+##na
+department
+hall
+role
+various
+production
+21
+19
+heart
+2001
+living
+fire
+version
+##ers
+##f
+television
+royal
+##4
+produced
+working
+act
+case
+society
+region
+present
+radio
+period
+looking
+least
+total
+keep
+england
+wife
+program
+per
+brother
+mind
+special
+22
+##le
+am
+works
+soon
+##6
+political
+george
+services
+taken
+created
+##7
+further
+able
+reached
+david
+union
+joined
+upon
+done
+important
+social
+information
+either
+##ic
+##x
+appeared
+position
+ground
+lead
+rock
+dark
+election
+23
+board
+france
+hair
+course
+arms
+site
+police
+girl
+instead
+real
+sound
+##v
+words
+moment
+##te
+someone
+##8
+summer
+project
+announced
+san
+less
+wrote
+past
+followed
+##5
+blue
+founded
+al
+finally
+india
+taking
+records
+america
+##ne
+1999
+design
+considered
+northern
+god
+stop
+battle
+toward
+european
+outside
+described
+track
+today
+playing
+language
+28
+call
+26
+heard
+professional
+low
+australia
+miles
+california
+win
+yet
+green
+##ie
+trying
+blood
+##ton
+southern
+science
+maybe
+everything
+match
+square
+27
+mouth
+video
+race
+recorded
+leave
+above
+##9
+daughter
+points
+space
+1998
+museum
+change
+middle
+common
+##0
+move
+tv
+post
+##ta
+lake
+seven
+tried
+elected
+closed
+ten
+paul
+minister
+##th
+months
+start
+chief
+return
+canada
+person
+sea
+release
+similar
+modern
+brought
+rest
+hit
+formed
+mr
+##la
+1997
+floor
+event
+doing
+thomas
+1996
+robert
+care
+killed
+training
+star
+week
+needed
+turn
+finished
+railway
+rather
+news
+health
+sent
+example
+ran
+term
+michael
+coming
+currently
+yes
+forces
+despite
+gold
+areas
+50
+stage
+fact
+29
+dead
+says
+popular
+2018
+originally
+germany
+probably
+developed
+result
+pulled
+friend
+stood
+money
+running
+mi
+signed
+word
+songs
+child
+eventually
+met
+tour
+average
+teams
+minutes
+festival
+current
+deep
+kind
+1995
+decided
+usually
+eastern
+seemed
+##ness
+episode
+bed
+added
+table
+indian
+private
+charles
+route
+available
+idea
+throughout
+centre
+addition
+appointed
+style
+1994
+books
+eight
+construction
+press
+mean
+wall
+friends
+remained
+schools
+study
+##ch
+##um
+institute
+oh
+chinese
+sometimes
+events
+possible
+1992
+australian
+type
+brown
+forward
+talk
+process
+food
+debut
+seat
+performance
+committee
+features
+character
+arts
+herself
+else
+lot
+strong
+russian
+range
+hours
+peter
+arm
+##da
+morning
+dr
+sold
+##ry
+quickly
+directed
+1993
+guitar
+china
+##w
+31
+list
+##ma
+performed
+media
+uk
+players
+smile
+##rs
+myself
+40
+placed
+coach
+province
+towards
+wouldn
+leading
+whole
+boy
+official
+designed
+grand
+census
+##el
+europe
+attack
+japanese
+henry
+1991
+##re
+##os
+cross
+getting
+alone
+action
+lower
+network
+wide
+washington
+japan
+1990
+hospital
+believe
+changed
+sister
+##ar
+hold
+gone
+sir
+hadn
+ship
+##ka
+studies
+academy
+shot
+rights
+below
+base
+bad
+involved
+kept
+largest
+##ist
+bank
+future
+especially
+beginning
+mark
+movement
+section
+female
+magazine
+plan
+professor
+lord
+longer
+##ian
+sat
+walked
+hill
+actually
+civil
+energy
+model
+families
+size
+thus
+aircraft
+completed
+includes
+data
+captain
+##or
+fight
+vocals
+featured
+richard
+bridge
+fourth
+1989
+officer
+stone
+hear
+##ism
+means
+medical
+groups
+management
+self
+lips
+competition
+entire
+lived
+technology
+leaving
+federal
+tournament
+bit
+passed
+hot
+independent
+awards
+kingdom
+mary
+spent
+fine
+doesn
+reported
+##ling
+jack
+fall
+raised
+itself
+stay
+true
+studio
+1988
+sports
+replaced
+paris
+systems
+saint
+leader
+theatre
+whose
+market
+capital
+parents
+spanish
+canadian
+earth
+##ity
+cut
+degree
+writing
+bay
+christian
+awarded
+natural
+higher
+bill
+##as
+coast
+provided
+previous
+senior
+ft
+valley
+organization
+stopped
+onto
+countries
+parts
+conference
+queen
+security
+interest
+saying
+allowed
+master
+earlier
+phone
+matter
+smith
+winning
+try
+happened
+moving
+campaign
+los
+##ley
+breath
+nearly
+mid
+1987
+certain
+girls
+date
+italian
+african
+standing
+fell
+artist
+##ted
+shows
+deal
+mine
+industry
+1986
+##ng
+everyone
+republic
+provide
+collection
+library
+student
+##ville
+primary
+owned
+older
+via
+heavy
+1st
+makes
+##able
+attention
+anyone
+africa
+##ri
+stated
+length
+ended
+fingers
+command
+staff
+skin
+foreign
+opening
+governor
+okay
+medal
+kill
+sun
+cover
+job
+1985
+introduced
+chest
+hell
+feeling
+##ies
+success
+meet
+reason
+standard
+meeting
+novel
+1984
+trade
+source
+buildings
+##land
+rose
+guy
+goal
+##ur
+chapter
+native
+husband
+previously
+unit
+limited
+entered
+weeks
+producer
+operations
+mountain
+takes
+covered
+forced
+related
+roman
+complete
+successful
+key
+texas
+cold
+##ya
+channel
+1980
+traditional
+films
+dance
+clear
+approximately
+500
+nine
+van
+prince
+question
+active
+tracks
+ireland
+regional
+silver
+author
+personal
+sense
+operation
+##ine
+economic
+1983
+holding
+twenty
+isbn
+additional
+speed
+hour
+edition
+regular
+historic
+places
+whom
+shook
+movie
+km²
+secretary
+prior
+report
+chicago
+read
+foundation
+view
+engine
+scored
+1982
+units
+ask
+airport
+property
+ready
+immediately
+lady
+month
+listed
+contract
+##de
+manager
+themselves
+lines
+##ki
+navy
+writer
+meant
+##ts
+runs
+##ro
+practice
+championships
+singer
+glass
+commission
+required
+forest
+starting
+culture
+generally
+giving
+access
+attended
+test
+couple
+stand
+catholic
+martin
+caught
+executive
+##less
+eye
+##ey
+thinking
+chair
+quite
+shoulder
+1979
+hope
+decision
+plays
+defeated
+municipality
+whether
+structure
+offered
+slowly
+pain
+ice
+direction
+##ion
+paper
+mission
+1981
+mostly
+200
+noted
+individual
+managed
+nature
+lives
+plant
+##ha
+helped
+except
+studied
+computer
+figure
+relationship
+issue
+significant
+loss
+die
+smiled
+gun
+ago
+highest
+1972
+##am
+male
+bring
+goals
+mexico
+problem
+distance
+commercial
+completely
+location
+annual
+famous
+drive
+1976
+neck
+1978
+surface
+caused
+italy
+understand
+greek
+highway
+wrong
+hotel
+comes
+appearance
+joseph
+double
+issues
+musical
+companies
+castle
+income
+review
+assembly
+bass
+initially
+parliament
+artists
+experience
+1974
+particular
+walk
+foot
+engineering
+talking
+window
+dropped
+##ter
+miss
+baby
+boys
+break
+1975
+stars
+edge
+remember
+policy
+carried
+train
+stadium
+bar
+sex
+angeles
+evidence
+##ge
+becoming
+assistant
+soviet
+1977
+upper
+step
+wing
+1970
+youth
+financial
+reach
+##ll
+actor
+numerous
+##se
+##st
+nodded
+arrived
+##ation
+minute
+##nt
+believed
+sorry
+complex
+beautiful
+victory
+associated
+temple
+1968
+1973
+chance
+perhaps
+metal
+##son
+1945
+bishop
+##et
+lee
+launched
+particularly
+tree
+le
+retired
+subject
+prize
+contains
+yeah
+theory
+empire
+##ce
+suddenly
+waiting
+trust
+recording
+##to
+happy
+terms
+camp
+champion
+1971
+religious
+pass
+zealand
+names
+2nd
+port
+ancient
+tom
+corner
+represented
+watch
+legal
+anti
+justice
+cause
+watched
+brothers
+45
+material
+changes
+simply
+response
+louis
+fast
+##ting
+answer
+60
+historical
+1969
+stories
+straight
+create
+feature
+increased
+rate
+administration
+virginia
+el
+activities
+cultural
+overall
+winner
+programs
+basketball
+legs
+guard
+beyond
+cast
+doctor
+mm
+flight
+results
+remains
+cost
+effect
+winter
+##ble
+larger
+islands
+problems
+chairman
+grew
+commander
+isn
+1967
+pay
+failed
+selected
+hurt
+fort
+box
+regiment
+majority
+journal
+35
+edward
+plans
+##ke
+##ni
+shown
+pretty
+irish
+characters
+directly
+scene
+likely
+operated
+allow
+spring
+##j
+junior
+matches
+looks
+mike
+houses
+fellow
+##tion
+beach
+marriage
+##ham
+##ive
+rules
+oil
+65
+florida
+expected
+nearby
+congress
+sam
+peace
+recent
+iii
+wait
+subsequently
+cell
+##do
+variety
+serving
+agreed
+please
+poor
+joe
+pacific
+attempt
+wood
+democratic
+piece
+prime
+##ca
+rural
+mile
+touch
+appears
+township
+1964
+1966
+soldiers
+##men
+##ized
+1965
+pennsylvania
+closer
+fighting
+claimed
+score
+jones
+physical
+editor
+##ous
+filled
+genus
+specific
+sitting
+super
+mom
+##va
+therefore
+supported
+status
+fear
+cases
+store
+meaning
+wales
+minor
+spain
+tower
+focus
+vice
+frank
+follow
+parish
+separate
+golden
+horse
+fifth
+remaining
+branch
+32
+presented
+stared
+##id
+uses
+secret
+forms
+##co
+baseball
+exactly
+##ck
+choice
+note
+discovered
+travel
+composed
+truth
+russia
+ball
+color
+kiss
+dad
+wind
+continue
+ring
+referred
+numbers
+digital
+greater
+##ns
+metres
+slightly
+direct
+increase
+1960
+responsible
+crew
+rule
+trees
+troops
+##no
+broke
+goes
+individuals
+hundred
+weight
+creek
+sleep
+memory
+defense
+provides
+ordered
+code
+value
+jewish
+windows
+1944
+safe
+judge
+whatever
+corps
+realized
+growing
+pre
+##ga
+cities
+alexander
+gaze
+lies
+spread
+scott
+letter
+showed
+situation
+mayor
+transport
+watching
+workers
+extended
+##li
+expression
+normal
+##ment
+chart
+multiple
+border
+##ba
+host
+##ner
+daily
+mrs
+walls
+piano
+##ko
+heat
+cannot
+##ate
+earned
+products
+drama
+era
+authority
+seasons
+join
+grade
+##io
+sign
+difficult
+machine
+1963
+territory
+mainly
+##wood
+stations
+squadron
+1962
+stepped
+iron
+19th
+##led
+serve
+appear
+sky
+speak
+broken
+charge
+knowledge
+kilometres
+removed
+ships
+article
+campus
+simple
+##ty
+pushed
+britain
+##ve
+leaves
+recently
+cd
+soft
+boston
+latter
+easy
+acquired
+poland
+##sa
+quality
+officers
+presence
+planned
+nations
+mass
+broadcast
+jean
+share
+image
+influence
+wild
+offer
+emperor
+electric
+reading
+headed
+ability
+promoted
+yellow
+ministry
+1942
+throat
+smaller
+politician
+##by
+latin
+spoke
+cars
+williams
+males
+lack
+pop
+80
+##ier
+acting
+seeing
+consists
+##ti
+estate
+1961
+pressure
+johnson
+newspaper
+jr
+chris
+olympics
+online
+conditions
+beat
+elements
+walking
+vote
+##field
+needs
+carolina
+text
+featuring
+global
+block
+shirt
+levels
+francisco
+purpose
+females
+et
+dutch
+duke
+ahead
+gas
+twice
+safety
+serious
+turning
+highly
+lieutenant
+firm
+maria
+amount
+mixed
+daniel
+proposed
+perfect
+agreement
+affairs
+3rd
+seconds
+contemporary
+paid
+1943
+prison
+save
+kitchen
+label
+administrative
+intended
+constructed
+academic
+nice
+teacher
+races
+1956
+formerly
+corporation
+ben
+nation
+issued
+shut
+1958
+drums
+housing
+victoria
+seems
+opera
+1959
+graduated
+function
+von
+mentioned
+picked
+build
+recognized
+shortly
+protection
+picture
+notable
+exchange
+elections
+1980s
+loved
+percent
+racing
+fish
+elizabeth
+garden
+volume
+hockey
+1941
+beside
+settled
+##ford
+1940
+competed
+replied
+drew
+1948
+actress
+marine
+scotland
+steel
+glanced
+farm
+steve
+1957
+risk
+tonight
+positive
+magic
+singles
+effects
+gray
+screen
+dog
+##ja
+residents
+bus
+sides
+none
+secondary
+literature
+polish
+destroyed
+flying
+founder
+households
+1939
+lay
+reserve
+usa
+gallery
+##ler
+1946
+industrial
+younger
+approach
+appearances
+urban
+ones
+1950
+finish
+avenue
+powerful
+fully
+growth
+page
+honor
+jersey
+projects
+advanced
+revealed
+basic
+90
+infantry
+pair
+equipment
+visit
+33
+evening
+search
+grant
+effort
+solo
+treatment
+buried
+republican
+primarily
+bottom
+owner
+1970s
+israel
+gives
+jim
+dream
+bob
+remain
+spot
+70
+notes
+produce
+champions
+contact
+ed
+soul
+accepted
+ways
+del
+##ally
+losing
+split
+price
+capacity
+basis
+trial
+questions
+##ina
+1955
+20th
+guess
+officially
+memorial
+naval
+initial
+##ization
+whispered
+median
+engineer
+##ful
+sydney
+##go
+columbia
+strength
+300
+1952
+tears
+senate
+00
+card
+asian
+agent
+1947
+software
+44
+draw
+warm
+supposed
+com
+pro
+##il
+transferred
+leaned
+##at
+candidate
+escape
+mountains
+asia
+potential
+activity
+entertainment
+seem
+traffic
+jackson
+murder
+36
+slow
+product
+orchestra
+haven
+agency
+bbc
+taught
+website
+comedy
+unable
+storm
+planning
+albums
+rugby
+environment
+scientific
+grabbed
+protect
+##hi
+boat
+typically
+1954
+1953
+damage
+principal
+divided
+dedicated
+mount
+ohio
+##berg
+pick
+fought
+driver
+##der
+empty
+shoulders
+sort
+thank
+berlin
+prominent
+account
+freedom
+necessary
+efforts
+alex
+headquarters
+follows
+alongside
+des
+simon
+andrew
+suggested
+operating
+learning
+steps
+1949
+sweet
+technical
+begin
+easily
+34
+teeth
+speaking
+settlement
+scale
+##sh
+renamed
+ray
+max
+enemy
+semi
+joint
+compared
+##rd
+scottish
+leadership
+analysis
+offers
+georgia
+pieces
+captured
+animal
+deputy
+guest
+organized
+##lin
+tony
+combined
+method
+challenge
+1960s
+huge
+wants
+battalion
+sons
+rise
+crime
+types
+facilities
+telling
+path
+1951
+platform
+sit
+1990s
+##lo
+tells
+assigned
+rich
+pull
+##ot
+commonly
+alive
+##za
+letters
+concept
+conducted
+wearing
+happen
+bought
+becomes
+holy
+gets
+ocean
+defeat
+languages
+purchased
+coffee
+occurred
+titled
+##q
+declared
+applied
+sciences
+concert
+sounds
+jazz
+brain
+##me
+painting
+fleet
+tax
+nick
+##ius
+michigan
+count
+animals
+leaders
+episodes
+##line
+content
+##den
+birth
+##it
+clubs
+64
+palace
+critical
+refused
+fair
+leg
+laughed
+returning
+surrounding
+participated
+formation
+lifted
+pointed
+connected
+rome
+medicine
+laid
+taylor
+santa
+powers
+adam
+tall
+shared
+focused
+knowing
+yards
+entrance
+falls
+##wa
+calling
+##ad
+sources
+chosen
+beneath
+resources
+yard
+##ite
+nominated
+silence
+zone
+defined
+##que
+gained
+thirty
+38
+bodies
+moon
+##ard
+adopted
+christmas
+widely
+register
+apart
+iran
+premier
+serves
+du
+unknown
+parties
+##les
+generation
+##ff
+continues
+quick
+fields
+brigade
+quiet
+teaching
+clothes
+impact
+weapons
+partner
+flat
+theater
+supreme
+1938
+37
+relations
+##tor
+plants
+suffered
+1936
+wilson
+kids
+begins
+##age
+1918
+seats
+armed
+internet
+models
+worth
+laws
+400
+communities
+classes
+background
+knows
+thanks
+quarter
+reaching
+humans
+carry
+killing
+format
+kong
+hong
+setting
+75
+architecture
+disease
+railroad
+inc
+possibly
+wish
+arthur
+thoughts
+harry
+doors
+density
+##di
+crowd
+illinois
+stomach
+tone
+unique
+reports
+anyway
+##ir
+liberal
+der
+vehicle
+thick
+dry
+drug
+faced
+largely
+facility
+theme
+holds
+creation
+strange
+colonel
+##mi
+revolution
+bell
+politics
+turns
+silent
+rail
+relief
+independence
+combat
+shape
+write
+determined
+sales
+learned
+4th
+finger
+oxford
+providing
+1937
+heritage
+fiction
+situated
+designated
+allowing
+distribution
+hosted
+##est
+sight
+interview
+estimated
+reduced
+##ria
+toronto
+footballer
+keeping
+guys
+damn
+claim
+motion
+sport
+sixth
+stayed
+##ze
+en
+rear
+receive
+handed
+twelve
+dress
+audience
+granted
+brazil
+##well
+spirit
+##ated
+noticed
+etc
+olympic
+representative
+eric
+tight
+trouble
+reviews
+drink
+vampire
+missing
+roles
+ranked
+newly
+household
+finals
+wave
+critics
+##ee
+phase
+massachusetts
+pilot
+unlike
+philadelphia
+bright
+guns
+crown
+organizations
+roof
+42
+respectively
+clearly
+tongue
+marked
+circle
+fox
+korea
+bronze
+brian
+expanded
+sexual
+supply
+yourself
+inspired
+labour
+fc
+##ah
+reference
+vision
+draft
+connection
+brand
+reasons
+1935
+classic
+driving
+trip
+jesus
+cells
+entry
+1920
+neither
+trail
+claims
+atlantic
+orders
+labor
+nose
+afraid
+identified
+intelligence
+calls
+cancer
+attacked
+passing
+stephen
+positions
+imperial
+grey
+jason
+39
+sunday
+48
+swedish
+avoid
+extra
+uncle
+message
+covers
+allows
+surprise
+materials
+fame
+hunter
+##ji
+1930
+citizens
+figures
+davis
+environmental
+confirmed
+shit
+titles
+di
+performing
+difference
+acts
+attacks
+##ov
+existing
+votes
+opportunity
+nor
+shop
+entirely
+trains
+opposite
+pakistan
+##pa
+develop
+resulted
+representatives
+actions
+reality
+pressed
+##ish
+barely
+wine
+conversation
+faculty
+northwest
+ends
+documentary
+nuclear
+stock
+grace
+sets
+eat
+alternative
+##ps
+bag
+resulting
+creating
+surprised
+cemetery
+1919
+drop
+finding
+sarah
+cricket
+streets
+tradition
+ride
+1933
+exhibition
+target
+ear
+explained
+rain
+composer
+injury
+apartment
+municipal
+educational
+occupied
+netherlands
+clean
+billion
+constitution
+learn
+1914
+maximum
+classical
+francis
+lose
+opposition
+jose
+ontario
+bear
+core
+hills
+rolled
+ending
+drawn
+permanent
+fun
+##tes
+##lla
+lewis
+sites
+chamber
+ryan
+##way
+scoring
+height
+1934
+##house
+lyrics
+staring
+55
+officials
+1917
+snow
+oldest
+##tic
+orange
+##ger
+qualified
+interior
+apparently
+succeeded
+thousand
+dinner
+lights
+existence
+fans
+heavily
+41
+greatest
+conservative
+send
+bowl
+plus
+enter
+catch
+##un
+economy
+duty
+1929
+speech
+authorities
+princess
+performances
+versions
+shall
+graduate
+pictures
+effective
+remembered
+poetry
+desk
+crossed
+starring
+starts
+passenger
+sharp
+##ant
+acres
+ass
+weather
+falling
+rank
+fund
+supporting
+check
+adult
+publishing
+heads
+cm
+southeast
+lane
+##burg
+application
+bc
+##ura
+les
+condition
+transfer
+prevent
+display
+ex
+regions
+earl
+federation
+cool
+relatively
+answered
+besides
+1928
+obtained
+portion
+##town
+mix
+##ding
+reaction
+liked
+dean
+express
+peak
+1932
+##tte
+counter
+religion
+chain
+rare
+miller
+convention
+aid
+lie
+vehicles
+mobile
+perform
+squad
+wonder
+lying
+crazy
+sword
+##ping
+attempted
+centuries
+weren
+philosophy
+category
+##ize
+anna
+interested
+47
+sweden
+wolf
+frequently
+abandoned
+kg
+literary
+alliance
+task
+entitled
+##ay
+threw
+promotion
+factory
+tiny
+soccer
+visited
+matt
+fm
+achieved
+52
+defence
+internal
+persian
+43
+methods
+##ging
+arrested
+otherwise
+cambridge
+programming
+villages
+elementary
+districts
+rooms
+criminal
+conflict
+worry
+trained
+1931
+attempts
+waited
+signal
+bird
+truck
+subsequent
+programme
+##ol
+ad
+49
+communist
+details
+faith
+sector
+patrick
+carrying
+laugh
+##ss
+controlled
+korean
+showing
+origin
+fuel
+evil
+1927
+##ent
+brief
+identity
+darkness
+address
+pool
+missed
+publication
+web
+planet
+ian
+anne
+wings
+invited
+##tt
+briefly
+standards
+kissed
+##be
+ideas
+climate
+causing
+walter
+worse
+albert
+articles
+winners
+desire
+aged
+northeast
+dangerous
+gate
+doubt
+1922
+wooden
+multi
+##ky
+poet
+rising
+funding
+46
+communications
+communication
+violence
+copies
+prepared
+ford
+investigation
+skills
+1924
+pulling
+electronic
+##ak
+##ial
+##han
+containing
+ultimately
+offices
+singing
+understanding
+restaurant
+tomorrow
+fashion
+christ
+ward
+da
+pope
+stands
+5th
+flow
+studios
+aired
+commissioned
+contained
+exist
+fresh
+americans
+##per
+wrestling
+approved
+kid
+employed
+respect
+suit
+1925
+angel
+asking
+increasing
+frame
+angry
+selling
+1950s
+thin
+finds
+##nd
+temperature
+statement
+ali
+explain
+inhabitants
+towns
+extensive
+narrow
+51
+jane
+flowers
+images
+promise
+somewhere
+object
+fly
+closely
+##ls
+1912
+bureau
+cape
+1926
+weekly
+presidential
+legislative
+1921
+##ai
+##au
+launch
+founding
+##ny
+978
+##ring
+artillery
+strike
+un
+institutions
+roll
+writers
+landing
+chose
+kevin
+anymore
+pp
+##ut
+attorney
+fit
+dan
+billboard
+receiving
+agricultural
+breaking
+sought
+dave
+admitted
+lands
+mexican
+##bury
+charlie
+specifically
+hole
+iv
+howard
+credit
+moscow
+roads
+accident
+1923
+proved
+wear
+struck
+hey
+guards
+stuff
+slid
+expansion
+1915
+cat
+anthony
+##kin
+melbourne
+opposed
+sub
+southwest
+architect
+failure
+plane
+1916
+##ron
+map
+camera
+tank
+listen
+regarding
+wet
+introduction
+metropolitan
+link
+ep
+fighter
+inch
+grown
+gene
+anger
+fixed
+buy
+dvd
+khan
+domestic
+worldwide
+chapel
+mill
+functions
+examples
+##head
+developing
+1910
+turkey
+hits
+pocket
+antonio
+papers
+grow
+unless
+circuit
+18th
+concerned
+attached
+journalist
+selection
+journey
+converted
+provincial
+painted
+hearing
+aren
+bands
+negative
+aside
+wondered
+knight
+lap
+survey
+ma
+##ow
+noise
+billy
+##ium
+shooting
+guide
+bedroom
+priest
+resistance
+motor
+homes
+sounded
+giant
+##mer
+150
+scenes
+equal
+comic
+patients
+hidden
+solid
+actual
+bringing
+afternoon
+touched
+funds
+wedding
+consisted
+marie
+canal
+sr
+kim
+treaty
+turkish
+recognition
+residence
+cathedral
+broad
+knees
+incident
+shaped
+fired
+norwegian
+handle
+cheek
+contest
+represent
+##pe
+representing
+beauty
+##sen
+birds
+advantage
+emergency
+wrapped
+drawing
+notice
+pink
+broadcasting
+##ong
+somehow
+bachelor
+seventh
+collected
+registered
+establishment
+alan
+assumed
+chemical
+personnel
+roger
+retirement
+jeff
+portuguese
+wore
+tied
+device
+threat
+progress
+advance
+##ised
+banks
+hired
+manchester
+nfl
+teachers
+structures
+forever
+##bo
+tennis
+helping
+saturday
+sale
+applications
+junction
+hip
+incorporated
+neighborhood
+dressed
+ceremony
+##ds
+influenced
+hers
+visual
+stairs
+decades
+inner
+kansas
+hung
+hoped
+gain
+scheduled
+downtown
+engaged
+austria
+clock
+norway
+certainly
+pale
+protected
+1913
+victor
+employees
+plate
+putting
+surrounded
+##ists
+finishing
+blues
+tropical
+##ries
+minnesota
+consider
+philippines
+accept
+54
+retrieved
+1900
+concern
+anderson
+properties
+institution
+gordon
+successfully
+vietnam
+##dy
+backing
+outstanding
+muslim
+crossing
+folk
+producing
+usual
+demand
+occurs
+observed
+lawyer
+educated
+##ana
+kelly
+string
+pleasure
+budget
+items
+quietly
+colorado
+philip
+typical
+##worth
+derived
+600
+survived
+asks
+mental
+##ide
+56
+jake
+jews
+distinguished
+ltd
+1911
+sri
+extremely
+53
+athletic
+loud
+thousands
+worried
+shadow
+transportation
+horses
+weapon
+arena
+importance
+users
+tim
+objects
+contributed
+dragon
+douglas
+aware
+senator
+johnny
+jordan
+sisters
+engines
+flag
+investment
+samuel
+shock
+capable
+clark
+row
+wheel
+refers
+session
+familiar
+biggest
+wins
+hate
+maintained
+drove
+hamilton
+request
+expressed
+injured
+underground
+churches
+walker
+wars
+tunnel
+passes
+stupid
+agriculture
+softly
+cabinet
+regarded
+joining
+indiana
+##ea
+##ms
+push
+dates
+spend
+behavior
+woods
+protein
+gently
+chase
+morgan
+mention
+burning
+wake
+combination
+occur
+mirror
+leads
+jimmy
+indeed
+impossible
+singapore
+paintings
+covering
+##nes
+soldier
+locations
+attendance
+sell
+historian
+wisconsin
+invasion
+argued
+painter
+diego
+changing
+egypt
+##don
+experienced
+inches
+##ku
+missouri
+vol
+grounds
+spoken
+switzerland
+##gan
+reform
+rolling
+ha
+forget
+massive
+resigned
+burned
+allen
+tennessee
+locked
+values
+improved
+##mo
+wounded
+universe
+sick
+dating
+facing
+pack
+purchase
+user
+##pur
+moments
+##ul
+merged
+anniversary
+1908
+coal
+brick
+understood
+causes
+dynasty
+queensland
+establish
+stores
+crisis
+promote
+hoping
+views
+cards
+referee
+extension
+##si
+raise
+arizona
+improve
+colonial
+formal
+charged
+##rt
+palm
+lucky
+hide
+rescue
+faces
+95
+feelings
+candidates
+juan
+##ell
+goods
+6th
+courses
+weekend
+59
+luke
+cash
+fallen
+##om
+delivered
+affected
+installed
+carefully
+tries
+swiss
+hollywood
+costs
+lincoln
+responsibility
+##he
+shore
+file
+proper
+normally
+maryland
+assistance
+jump
+constant
+offering
+friendly
+waters
+persons
+realize
+contain
+trophy
+800
+partnership
+factor
+58
+musicians
+cry
+bound
+oregon
+indicated
+hero
+houston
+medium
+##ure
+consisting
+somewhat
+##ara
+57
+cycle
+##che
+beer
+moore
+frederick
+gotten
+eleven
+worst
+weak
+approached
+arranged
+chin
+loan
+universal
+bond
+fifteen
+pattern
+disappeared
+##ney
+translated
+##zed
+lip
+arab
+capture
+interests
+insurance
+##chi
+shifted
+cave
+prix
+warning
+sections
+courts
+coat
+plot
+smell
+feed
+golf
+favorite
+maintain
+knife
+vs
+voted
+degrees
+finance
+quebec
+opinion
+translation
+manner
+ruled
+operate
+productions
+choose
+musician
+discovery
+confused
+tired
+separated
+stream
+techniques
+committed
+attend
+ranking
+kings
+throw
+passengers
+measure
+horror
+fan
+mining
+sand
+danger
+salt
+calm
+decade
+dam
+require
+runner
+##ik
+rush
+associate
+greece
+##ker
+rivers
+consecutive
+matthew
+##ski
+sighed
+sq
+documents
+steam
+edited
+closing
+tie
+accused
+1905
+##ini
+islamic
+distributed
+directors
+organisation
+bruce
+7th
+breathing
+mad
+lit
+arrival
+concrete
+taste
+08
+composition
+shaking
+faster
+amateur
+adjacent
+stating
+1906
+twin
+flew
+##ran
+tokyo
+publications
+##tone
+obviously
+ridge
+storage
+1907
+carl
+pages
+concluded
+desert
+driven
+universities
+ages
+terminal
+sequence
+borough
+250
+constituency
+creative
+cousin
+economics
+dreams
+margaret
+notably
+reduce
+montreal
+mode
+17th
+ears
+saved
+jan
+vocal
+##ica
+1909
+andy
+##jo
+riding
+roughly
+threatened
+##ise
+meters
+meanwhile
+landed
+compete
+repeated
+grass
+czech
+regularly
+charges
+tea
+sudden
+appeal
+##ung
+solution
+describes
+pierre
+classification
+glad
+parking
+##ning
+belt
+physics
+99
+rachel
+add
+hungarian
+participate
+expedition
+damaged
+gift
+childhood
+85
+fifty
+##red
+mathematics
+jumped
+letting
+defensive
+mph
+##ux
+##gh
+testing
+##hip
+hundreds
+shoot
+owners
+matters
+smoke
+israeli
+kentucky
+dancing
+mounted
+grandfather
+emma
+designs
+profit
+argentina
+##gs
+truly
+li
+lawrence
+cole
+begun
+detroit
+willing
+branches
+smiling
+decide
+miami
+enjoyed
+recordings
+##dale
+poverty
+ethnic
+gay
+##bi
+gary
+arabic
+09
+accompanied
+##one
+##ons
+fishing
+determine
+residential
+acid
+##ary
+alice
+returns
+starred
+mail
+##ang
+jonathan
+strategy
+##ue
+net
+forty
+cook
+businesses
+equivalent
+commonwealth
+distinct
+ill
+##cy
+seriously
+##ors
+##ped
+shift
+harris
+replace
+rio
+imagine
+formula
+ensure
+##ber
+additionally
+scheme
+conservation
+occasionally
+purposes
+feels
+favor
+##and
+##ore
+1930s
+contrast
+hanging
+hunt
+movies
+1904
+instruments
+victims
+danish
+christopher
+busy
+demon
+sugar
+earliest
+colony
+studying
+balance
+duties
+##ks
+belgium
+slipped
+carter
+05
+visible
+stages
+iraq
+fifa
+##im
+commune
+forming
+zero
+07
+continuing
+talked
+counties
+legend
+bathroom
+option
+tail
+clay
+daughters
+afterwards
+severe
+jaw
+visitors
+##ded
+devices
+aviation
+russell
+kate
+##vi
+entering
+subjects
+##ino
+temporary
+swimming
+forth
+smooth
+ghost
+audio
+bush
+operates
+rocks
+movements
+signs
+eddie
+##tz
+ann
+voices
+honorary
+06
+memories
+dallas
+pure
+measures
+racial
+promised
+66
+harvard
+ceo
+16th
+parliamentary
+indicate
+benefit
+flesh
+dublin
+louisiana
+1902
+1901
+patient
+sleeping
+1903
+membership
+coastal
+medieval
+wanting
+element
+scholars
+rice
+62
+limit
+survive
+makeup
+rating
+definitely
+collaboration
+obvious
+##tan
+boss
+ms
+baron
+birthday
+linked
+soil
+diocese
+##lan
+ncaa
+##mann
+offensive
+shell
+shouldn
+waist
+##tus
+plain
+ross
+organ
+resolution
+manufacturing
+adding
+relative
+kennedy
+98
+whilst
+moth
+marketing
+gardens
+crash
+72
+heading
+partners
+credited
+carlos
+moves
+cable
+##zi
+marshall
+##out
+depending
+bottle
+represents
+rejected
+responded
+existed
+04
+jobs
+denmark
+lock
+##ating
+treated
+graham
+routes
+talent
+commissioner
+drugs
+secure
+tests
+reign
+restored
+photography
+##gi
+contributions
+oklahoma
+designer
+disc
+grin
+seattle
+robin
+paused
+atlanta
+unusual
+##gate
+praised
+las
+laughing
+satellite
+hungary
+visiting
+##sky
+interesting
+factors
+deck
+poems
+norman
+##water
+stuck
+speaker
+rifle
+domain
+premiered
+##her
+dc
+comics
+actors
+01
+reputation
+eliminated
+8th
+ceiling
+prisoners
+script
+##nce
+leather
+austin
+mississippi
+rapidly
+admiral
+parallel
+charlotte
+guilty
+tools
+gender
+divisions
+fruit
+##bs
+laboratory
+nelson
+fantasy
+marry
+rapid
+aunt
+tribe
+requirements
+aspects
+suicide
+amongst
+adams
+bone
+ukraine
+abc
+kick
+sees
+edinburgh
+clothing
+column
+rough
+gods
+hunting
+broadway
+gathered
+concerns
+##ek
+spending
+ty
+12th
+snapped
+requires
+solar
+bones
+cavalry
+##tta
+iowa
+drinking
+waste
+index
+franklin
+charity
+thompson
+stewart
+tip
+flash
+landscape
+friday
+enjoy
+singh
+poem
+listening
+##back
+eighth
+fred
+differences
+adapted
+bomb
+ukrainian
+surgery
+corporate
+masters
+anywhere
+##more
+waves
+odd
+sean
+portugal
+orleans
+dick
+debate
+kent
+eating
+puerto
+cleared
+96
+expect
+cinema
+97
+guitarist
+blocks
+electrical
+agree
+involving
+depth
+dying
+panel
+struggle
+##ged
+peninsula
+adults
+novels
+emerged
+vienna
+metro
+debuted
+shoes
+tamil
+songwriter
+meets
+prove
+beating
+instance
+heaven
+scared
+sending
+marks
+artistic
+passage
+superior
+03
+significantly
+shopping
+##tive
+retained
+##izing
+malaysia
+technique
+cheeks
+##ola
+warren
+maintenance
+destroy
+extreme
+allied
+120
+appearing
+##yn
+fill
+advice
+alabama
+qualifying
+policies
+cleveland
+hat
+battery
+smart
+authors
+10th
+soundtrack
+acted
+dated
+lb
+glance
+equipped
+coalition
+funny
+outer
+ambassador
+roy
+possibility
+couples
+campbell
+dna
+loose
+ethan
+supplies
+1898
+gonna
+88
+monster
+##res
+shake
+agents
+frequency
+springs
+dogs
+practices
+61
+gang
+plastic
+easier
+suggests
+gulf
+blade
+exposed
+colors
+industries
+markets
+pan
+nervous
+electoral
+charts
+legislation
+ownership
+##idae
+mac
+appointment
+shield
+copy
+assault
+socialist
+abbey
+monument
+license
+throne
+employment
+jay
+93
+replacement
+charter
+cloud
+powered
+suffering
+accounts
+oak
+connecticut
+strongly
+wright
+colour
+crystal
+13th
+context
+welsh
+networks
+voiced
+gabriel
+jerry
+##cing
+forehead
+mp
+##ens
+manage
+schedule
+totally
+remix
+##ii
+forests
+occupation
+print
+nicholas
+brazilian
+strategic
+vampires
+engineers
+76
+roots
+seek
+correct
+instrumental
+und
+alfred
+backed
+hop
+##des
+stanley
+robinson
+traveled
+wayne
+welcome
+austrian
+achieve
+67
+exit
+rates
+1899
+strip
+whereas
+##cs
+sing
+deeply
+adventure
+bobby
+rick
+jamie
+careful
+components
+cap
+useful
+personality
+knee
+##shi
+pushing
+hosts
+02
+protest
+ca
+ottoman
+symphony
+##sis
+63
+boundary
+1890
+processes
+considering
+considerable
+tons
+##work
+##ft
+##nia
+cooper
+trading
+dear
+conduct
+91
+illegal
+apple
+revolutionary
+holiday
+definition
+harder
+##van
+jacob
+circumstances
+destruction
+##lle
+popularity
+grip
+classified
+liverpool
+donald
+baltimore
+flows
+seeking
+honour
+approval
+92
+mechanical
+till
+happening
+statue
+critic
+increasingly
+immediate
+describe
+commerce
+stare
+##ster
+indonesia
+meat
+rounds
+boats
+baker
+orthodox
+depression
+formally
+worn
+naked
+claire
+muttered
+sentence
+11th
+emily
+document
+77
+criticism
+wished
+vessel
+spiritual
+bent
+virgin
+parker
+minimum
+murray
+lunch
+danny
+printed
+compilation
+keyboards
+false
+blow
+belonged
+68
+raising
+78
+cutting
+##board
+pittsburgh
+##up
+9th
+shadows
+81
+hated
+indigenous
+jon
+15th
+barry
+scholar
+ah
+##zer
+oliver
+##gy
+stick
+susan
+meetings
+attracted
+spell
+romantic
+##ver
+ye
+1895
+photo
+demanded
+customers
+##ac
+1896
+logan
+revival
+keys
+modified
+commanded
+jeans
+##ious
+upset
+raw
+phil
+detective
+hiding
+resident
+vincent
+##bly
+experiences
+diamond
+defeating
+coverage
+lucas
+external
+parks
+franchise
+helen
+bible
+successor
+percussion
+celebrated
+il
+lift
+profile
+clan
+romania
+##ied
+mills
+##su
+nobody
+achievement
+shrugged
+fault
+1897
+rhythm
+initiative
+breakfast
+carbon
+700
+69
+lasted
+violent
+74
+wound
+ken
+killer
+gradually
+filmed
+°c
+dollars
+processing
+94
+remove
+criticized
+guests
+sang
+chemistry
+##vin
+legislature
+disney
+##bridge
+uniform
+escaped
+integrated
+proposal
+purple
+denied
+liquid
+karl
+influential
+morris
+nights
+stones
+intense
+experimental
+twisted
+71
+84
+##ld
+pace
+nazi
+mitchell
+ny
+blind
+reporter
+newspapers
+14th
+centers
+burn
+basin
+forgotten
+surviving
+filed
+collections
+monastery
+losses
+manual
+couch
+description
+appropriate
+merely
+tag
+missions
+sebastian
+restoration
+replacing
+triple
+73
+elder
+julia
+warriors
+benjamin
+julian
+convinced
+stronger
+amazing
+declined
+versus
+merchant
+happens
+output
+finland
+bare
+barbara
+absence
+ignored
+dawn
+injuries
+##port
+producers
+##ram
+82
+luis
+##ities
+kw
+admit
+expensive
+electricity
+nba
+exception
+symbol
+##ving
+ladies
+shower
+sheriff
+characteristics
+##je
+aimed
+button
+ratio
+effectively
+summit
+angle
+jury
+bears
+foster
+vessels
+pants
+executed
+evans
+dozen
+advertising
+kicked
+patrol
+1889
+competitions
+lifetime
+principles
+athletics
+##logy
+birmingham
+sponsored
+89
+rob
+nomination
+1893
+acoustic
+##sm
+creature
+longest
+##tra
+credits
+harbor
+dust
+josh
+##so
+territories
+milk
+infrastructure
+completion
+thailand
+indians
+leon
+archbishop
+##sy
+assist
+pitch
+blake
+arrangement
+girlfriend
+serbian
+operational
+hence
+sad
+scent
+fur
+dj
+sessions
+hp
+refer
+rarely
+##ora
+exists
+1892
+##ten
+scientists
+dirty
+penalty
+burst
+portrait
+seed
+79
+pole
+limits
+rival
+1894
+stable
+alpha
+grave
+constitutional
+alcohol
+arrest
+flower
+mystery
+devil
+architectural
+relationships
+greatly
+habitat
+##istic
+larry
+progressive
+remote
+cotton
+##ics
+##ok
+preserved
+reaches
+##ming
+cited
+86
+vast
+scholarship
+decisions
+cbs
+joy
+teach
+1885
+editions
+knocked
+eve
+searching
+partly
+participation
+gap
+animated
+fate
+excellent
+##ett
+na
+87
+alternate
+saints
+youngest
+##ily
+climbed
+##ita
+##tors
+suggest
+##ct
+discussion
+staying
+choir
+lakes
+jacket
+revenue
+nevertheless
+peaked
+instrument
+wondering
+annually
+managing
+neil
+1891
+signing
+terry
+##ice
+apply
+clinical
+brooklyn
+aim
+catherine
+fuck
+farmers
+figured
+ninth
+pride
+hugh
+evolution
+ordinary
+involvement
+comfortable
+shouted
+tech
+encouraged
+taiwan
+representation
+sharing
+##lia
+##em
+panic
+exact
+cargo
+competing
+fat
+cried
+83
+1920s
+occasions
+pa
+cabin
+borders
+utah
+marcus
+##isation
+badly
+muscles
+##ance
+victorian
+transition
+warner
+bet
+permission
+##rin
+slave
+terrible
+similarly
+shares
+seth
+uefa
+possession
+medals
+benefits
+colleges
+lowered
+perfectly
+mall
+transit
+##ye
+##kar
+publisher
+##ened
+harrison
+deaths
+elevation
+##ae
+asleep
+machines
+sigh
+ash
+hardly
+argument
+occasion
+parent
+leo
+decline
+1888
+contribution
+##ua
+concentration
+1000
+opportunities
+hispanic
+guardian
+extent
+emotions
+hips
+mason
+volumes
+bloody
+controversy
+diameter
+steady
+mistake
+phoenix
+identify
+violin
+##sk
+departure
+richmond
+spin
+funeral
+enemies
+1864
+gear
+literally
+connor
+random
+sergeant
+grab
+confusion
+1865
+transmission
+informed
+op
+leaning
+sacred
+suspended
+thinks
+gates
+portland
+luck
+agencies
+yours
+hull
+expert
+muscle
+layer
+practical
+sculpture
+jerusalem
+latest
+lloyd
+statistics
+deeper
+recommended
+warrior
+arkansas
+mess
+supports
+greg
+eagle
+1880
+recovered
+rated
+concerts
+rushed
+##ano
+stops
+eggs
+files
+premiere
+keith
+##vo
+delhi
+turner
+pit
+affair
+belief
+paint
+##zing
+mate
+##ach
+##ev
+victim
+##ology
+withdrew
+bonus
+styles
+fled
+##ud
+glasgow
+technologies
+funded
+nbc
+adaptation
+##ata
+portrayed
+cooperation
+supporters
+judges
+bernard
+justin
+hallway
+ralph
+##ick
+graduating
+controversial
+distant
+continental
+spider
+bite
+##ho
+recognize
+intention
+mixing
+##ese
+egyptian
+bow
+tourism
+suppose
+claiming
+tiger
+dominated
+participants
+vi
+##ru
+nurse
+partially
+tape
+##rum
+psychology
+##rn
+essential
+touring
+duo
+voting
+civilian
+emotional
+channels
+##king
+apparent
+hebrew
+1887
+tommy
+carrier
+intersection
+beast
+hudson
+##gar
+##zo
+lab
+nova
+bench
+discuss
+costa
+##ered
+detailed
+behalf
+drivers
+unfortunately
+obtain
+##lis
+rocky
+##dae
+siege
+friendship
+honey
+##rian
+1861
+amy
+hang
+posted
+governments
+collins
+respond
+wildlife
+preferred
+operator
+##po
+laura
+pregnant
+videos
+dennis
+suspected
+boots
+instantly
+weird
+automatic
+businessman
+alleged
+placing
+throwing
+ph
+mood
+1862
+perry
+venue
+jet
+remainder
+##lli
+##ci
+passion
+biological
+boyfriend
+1863
+dirt
+buffalo
+ron
+segment
+fa
+abuse
+##era
+genre
+thrown
+stroke
+colored
+stress
+exercise
+displayed
+##gen
+struggled
+##tti
+abroad
+dramatic
+wonderful
+thereafter
+madrid
+component
+widespread
+##sed
+tale
+citizen
+todd
+monday
+1886
+vancouver
+overseas
+forcing
+crying
+descent
+##ris
+discussed
+substantial
+ranks
+regime
+1870
+provinces
+switch
+drum
+zane
+ted
+tribes
+proof
+lp
+cream
+researchers
+volunteer
+manor
+silk
+milan
+donated
+allies
+venture
+principle
+delivery
+enterprise
+##ves
+##ans
+bars
+traditionally
+witch
+reminded
+copper
+##uk
+pete
+inter
+links
+colin
+grinned
+elsewhere
+competitive
+frequent
+##oy
+scream
+##hu
+tension
+texts
+submarine
+finnish
+defending
+defend
+pat
+detail
+1884
+affiliated
+stuart
+themes
+villa
+periods
+tool
+belgian
+ruling
+crimes
+answers
+folded
+licensed
+resort
+demolished
+hans
+lucy
+1881
+lion
+traded
+photographs
+writes
+craig
+##fa
+trials
+generated
+beth
+noble
+debt
+percentage
+yorkshire
+erected
+ss
+viewed
+grades
+confidence
+ceased
+islam
+telephone
+retail
+##ible
+chile
+m²
+roberts
+sixteen
+##ich
+commented
+hampshire
+innocent
+dual
+pounds
+checked
+regulations
+afghanistan
+sung
+rico
+liberty
+assets
+bigger
+options
+angels
+relegated
+tribute
+wells
+attending
+leaf
+##yan
+butler
+romanian
+forum
+monthly
+lisa
+patterns
+gmina
+##tory
+madison
+hurricane
+rev
+##ians
+bristol
+##ula
+elite
+valuable
+disaster
+democracy
+awareness
+germans
+freyja
+##ins
+loop
+absolutely
+paying
+populations
+maine
+sole
+prayer
+spencer
+releases
+doorway
+bull
+##ani
+lover
+midnight
+conclusion
+##sson
+thirteen
+lily
+mediterranean
+##lt
+nhl
+proud
+sample
+##hill
+drummer
+guinea
+##ova
+murphy
+climb
+##ston
+instant
+attributed
+horn
+ain
+railways
+steven
+##ao
+autumn
+ferry
+opponent
+root
+traveling
+secured
+corridor
+stretched
+tales
+sheet
+trinity
+cattle
+helps
+indicates
+manhattan
+murdered
+fitted
+1882
+gentle
+grandmother
+mines
+shocked
+vegas
+produces
+##light
+caribbean
+##ou
+belong
+continuous
+desperate
+drunk
+historically
+trio
+waved
+raf
+dealing
+nathan
+bat
+murmured
+interrupted
+residing
+scientist
+pioneer
+harold
+aaron
+##net
+delta
+attempting
+minority
+mini
+believes
+chorus
+tend
+lots
+eyed
+indoor
+load
+shots
+updated
+jail
+##llo
+concerning
+connecting
+wealth
+##ved
+slaves
+arrive
+rangers
+sufficient
+rebuilt
+##wick
+cardinal
+flood
+muhammad
+whenever
+relation
+runners
+moral
+repair
+viewers
+arriving
+revenge
+punk
+assisted
+bath
+fairly
+breathe
+lists
+innings
+illustrated
+whisper
+nearest
+voters
+clinton
+ties
+ultimate
+screamed
+beijing
+lions
+andre
+fictional
+gathering
+comfort
+radar
+suitable
+dismissed
+hms
+ban
+pine
+wrist
+atmosphere
+voivodeship
+bid
+timber
+##ned
+##nan
+giants
+##ane
+cameron
+recovery
+uss
+identical
+categories
+switched
+serbia
+laughter
+noah
+ensemble
+therapy
+peoples
+touching
+##off
+locally
+pearl
+platforms
+everywhere
+ballet
+tables
+lanka
+herbert
+outdoor
+toured
+derek
+1883
+spaces
+contested
+swept
+1878
+exclusive
+slight
+connections
+##dra
+winds
+prisoner
+collective
+bangladesh
+tube
+publicly
+wealthy
+thai
+##ys
+isolated
+select
+##ric
+insisted
+pen
+fortune
+ticket
+spotted
+reportedly
+animation
+enforcement
+tanks
+110
+decides
+wider
+lowest
+owen
+##time
+nod
+hitting
+##hn
+gregory
+furthermore
+magazines
+fighters
+solutions
+##ery
+pointing
+requested
+peru
+reed
+chancellor
+knights
+mask
+worker
+eldest
+flames
+reduction
+1860
+volunteers
+##tis
+reporting
+##hl
+wire
+advisory
+endemic
+origins
+settlers
+pursue
+knock
+consumer
+1876
+eu
+compound
+creatures
+mansion
+sentenced
+ivan
+deployed
+guitars
+frowned
+involves
+mechanism
+kilometers
+perspective
+shops
+maps
+terminus
+duncan
+alien
+fist
+bridges
+##pers
+heroes
+fed
+derby
+swallowed
+##ros
+patent
+sara
+illness
+characterized
+adventures
+slide
+hawaii
+jurisdiction
+##op
+organised
+##side
+adelaide
+walks
+biology
+se
+##ties
+rogers
+swing
+tightly
+boundaries
+##rie
+prepare
+implementation
+stolen
+##sha
+certified
+colombia
+edwards
+garage
+##mm
+recalled
+##ball
+rage
+harm
+nigeria
+breast
+##ren
+furniture
+pupils
+settle
+##lus
+cuba
+balls
+client
+alaska
+21st
+linear
+thrust
+celebration
+latino
+genetic
+terror
+##cia
+##ening
+lightning
+fee
+witness
+lodge
+establishing
+skull
+##ique
+earning
+hood
+##ei
+rebellion
+wang
+sporting
+warned
+missile
+devoted
+activist
+porch
+worship
+fourteen
+package
+1871
+decorated
+##shire
+housed
+##ock
+chess
+sailed
+doctors
+oscar
+joan
+treat
+garcia
+harbour
+jeremy
+##ire
+traditions
+dominant
+jacques
+##gon
+##wan
+relocated
+1879
+amendment
+sized
+companion
+simultaneously
+volleyball
+spun
+acre
+increases
+stopping
+loves
+belongs
+affect
+drafted
+tossed
+scout
+battles
+1875
+filming
+shoved
+munich
+tenure
+vertical
+romance
+pc
+##cher
+argue
+##ical
+craft
+ranging
+www
+opens
+honest
+tyler
+yesterday
+virtual
+##let
+muslims
+reveal
+snake
+immigrants
+radical
+screaming
+speakers
+firing
+saving
+belonging
+ease
+lighting
+prefecture
+blame
+farmer
+hungry
+grows
+rubbed
+beam
+sur
+subsidiary
+##cha
+armenian
+sao
+dropping
+conventional
+##fer
+microsoft
+reply
+qualify
+spots
+1867
+sweat
+festivals
+##ken
+immigration
+physician
+discover
+exposure
+sandy
+explanation
+isaac
+implemented
+##fish
+hart
+initiated
+connect
+stakes
+presents
+heights
+householder
+pleased
+tourist
+regardless
+slip
+closest
+##ction
+surely
+sultan
+brings
+riley
+preparation
+aboard
+slammed
+baptist
+experiment
+ongoing
+interstate
+organic
+playoffs
+##ika
+1877
+130
+##tar
+hindu
+error
+tours
+tier
+plenty
+arrangements
+talks
+trapped
+excited
+sank
+ho
+athens
+1872
+denver
+welfare
+suburb
+athletes
+trick
+diverse
+belly
+exclusively
+yelled
+1868
+##med
+conversion
+##ette
+1874
+internationally
+computers
+conductor
+abilities
+sensitive
+hello
+dispute
+measured
+globe
+rocket
+prices
+amsterdam
+flights
+tigers
+inn
+municipalities
+emotion
+references
+3d
+##mus
+explains
+airlines
+manufactured
+pm
+archaeological
+1873
+interpretation
+devon
+comment
+##ites
+settlements
+kissing
+absolute
+improvement
+suite
+impressed
+barcelona
+sullivan
+jefferson
+towers
+jesse
+julie
+##tin
+##lu
+grandson
+hi
+gauge
+regard
+rings
+interviews
+trace
+raymond
+thumb
+departments
+burns
+serial
+bulgarian
+scores
+demonstrated
+##ix
+1866
+kyle
+alberta
+underneath
+romanized
+##ward
+relieved
+acquisition
+phrase
+cliff
+reveals
+han
+cuts
+merger
+custom
+##dar
+nee
+gilbert
+graduation
+##nts
+assessment
+cafe
+difficulty
+demands
+swung
+democrat
+jennifer
+commons
+1940s
+grove
+##yo
+completing
+focuses
+sum
+substitute
+bearing
+stretch
+reception
+##py
+reflected
+essentially
+destination
+pairs
+##ched
+survival
+resource
+##bach
+promoting
+doubles
+messages
+tear
+##down
+##fully
+parade
+florence
+harvey
+incumbent
+partial
+framework
+900
+pedro
+frozen
+procedure
+olivia
+controls
+##mic
+shelter
+personally
+temperatures
+##od
+brisbane
+tested
+sits
+marble
+comprehensive
+oxygen
+leonard
+##kov
+inaugural
+iranian
+referring
+quarters
+attitude
+##ivity
+mainstream
+lined
+mars
+dakota
+norfolk
+unsuccessful
+##°
+explosion
+helicopter
+congressional
+##sing
+inspector
+bitch
+seal
+departed
+divine
+##ters
+coaching
+examination
+punishment
+manufacturer
+sink
+columns
+unincorporated
+signals
+nevada
+squeezed
+dylan
+dining
+photos
+martial
+manuel
+eighteen
+elevator
+brushed
+plates
+ministers
+ivy
+congregation
+##len
+slept
+specialized
+taxes
+curve
+restricted
+negotiations
+likes
+statistical
+arnold
+inspiration
+execution
+bold
+intermediate
+significance
+margin
+ruler
+wheels
+gothic
+intellectual
+dependent
+listened
+eligible
+buses
+widow
+syria
+earn
+cincinnati
+collapsed
+recipient
+secrets
+accessible
+philippine
+maritime
+goddess
+clerk
+surrender
+breaks
+playoff
+database
+##ified
+##lon
+ideal
+beetle
+aspect
+soap
+regulation
+strings
+expand
+anglo
+shorter
+crosses
+retreat
+tough
+coins
+wallace
+directions
+pressing
+##oon
+shipping
+locomotives
+comparison
+topics
+nephew
+##mes
+distinction
+honors
+travelled
+sierra
+ibn
+##over
+fortress
+sa
+recognised
+carved
+1869
+clients
+##dan
+intent
+##mar
+coaches
+describing
+bread
+##ington
+beaten
+northwestern
+##ona
+merit
+youtube
+collapse
+challenges
+em
+historians
+objective
+submitted
+virus
+attacking
+drake
+assume
+##ere
+diseases
+marc
+stem
+leeds
+##cus
+##ab
+farming
+glasses
+##lock
+visits
+nowhere
+fellowship
+relevant
+carries
+restaurants
+experiments
+101
+constantly
+bases
+targets
+shah
+tenth
+opponents
+verse
+territorial
+##ira
+writings
+corruption
+##hs
+instruction
+inherited
+reverse
+emphasis
+##vic
+employee
+arch
+keeps
+rabbi
+watson
+payment
+uh
+##ala
+nancy
+##tre
+venice
+fastest
+sexy
+banned
+adrian
+properly
+ruth
+touchdown
+dollar
+boards
+metre
+circles
+edges
+favour
+comments
+ok
+travels
+liberation
+scattered
+firmly
+##ular
+holland
+permitted
+diesel
+kenya
+den
+originated
+##ral
+demons
+resumed
+dragged
+rider
+##rus
+servant
+blinked
+extend
+torn
+##ias
+##sey
+input
+meal
+everybody
+cylinder
+kinds
+camps
+##fe
+bullet
+logic
+##wn
+croatian
+evolved
+healthy
+fool
+chocolate
+wise
+preserve
+pradesh
+##ess
+respective
+1850
+##ew
+chicken
+artificial
+gross
+corresponding
+convicted
+cage
+caroline
+dialogue
+##dor
+narrative
+stranger
+mario
+br
+christianity
+failing
+trent
+commanding
+buddhist
+1848
+maurice
+focusing
+yale
+bike
+altitude
+##ering
+mouse
+revised
+##sley
+veteran
+##ig
+pulls
+theology
+crashed
+campaigns
+legion
+##ability
+drag
+excellence
+customer
+cancelled
+intensity
+excuse
+##lar
+liga
+participating
+contributing
+printing
+##burn
+variable
+##rk
+curious
+bin
+legacy
+renaissance
+##my
+symptoms
+binding
+vocalist
+dancer
+##nie
+grammar
+gospel
+democrats
+ya
+enters
+sc
+diplomatic
+hitler
+##ser
+clouds
+mathematical
+quit
+defended
+oriented
+##heim
+fundamental
+hardware
+impressive
+equally
+convince
+confederate
+guilt
+chuck
+sliding
+##ware
+magnetic
+narrowed
+petersburg
+bulgaria
+otto
+phd
+skill
+##ama
+reader
+hopes
+pitcher
+reservoir
+hearts
+automatically
+expecting
+mysterious
+bennett
+extensively
+imagined
+seeds
+monitor
+fix
+##ative
+journalism
+struggling
+signature
+ranch
+encounter
+photographer
+observation
+protests
+##pin
+influences
+##hr
+calendar
+##all
+cruz
+croatia
+locomotive
+hughes
+naturally
+shakespeare
+basement
+hook
+uncredited
+faded
+theories
+approaches
+dare
+phillips
+filling
+fury
+obama
+##ain
+efficient
+arc
+deliver
+min
+raid
+breeding
+inducted
+leagues
+efficiency
+axis
+montana
+eagles
+##ked
+supplied
+instructions
+karen
+picking
+indicating
+trap
+anchor
+practically
+christians
+tomb
+vary
+occasional
+electronics
+lords
+readers
+newcastle
+faint
+innovation
+collect
+situations
+engagement
+160
+claude
+mixture
+##feld
+peer
+tissue
+logo
+lean
+##ration
+°f
+floors
+##ven
+architects
+reducing
+##our
+##ments
+rope
+1859
+ottawa
+##har
+samples
+banking
+declaration
+proteins
+resignation
+francois
+saudi
+advocate
+exhibited
+armor
+twins
+divorce
+##ras
+abraham
+reviewed
+jo
+temporarily
+matrix
+physically
+pulse
+curled
+##ena
+difficulties
+bengal
+usage
+##ban
+annie
+riders
+certificate
+##pi
+holes
+warsaw
+distinctive
+jessica
+##mon
+mutual
+1857
+customs
+circular
+eugene
+removal
+loaded
+mere
+vulnerable
+depicted
+generations
+dame
+heir
+enormous
+lightly
+climbing
+pitched
+lessons
+pilots
+nepal
+ram
+google
+preparing
+brad
+louise
+renowned
+##₂
+liam
+##ably
+plaza
+shaw
+sophie
+brilliant
+bills
+##bar
+##nik
+fucking
+mainland
+server
+pleasant
+seized
+veterans
+jerked
+fail
+beta
+brush
+radiation
+stored
+warmth
+southeastern
+nate
+sin
+raced
+berkeley
+joke
+athlete
+designation
+trunk
+##low
+roland
+qualification
+archives
+heels
+artwork
+receives
+judicial
+reserves
+##bed
+woke
+installation
+abu
+floating
+fake
+lesser
+excitement
+interface
+concentrated
+addressed
+characteristic
+amanda
+saxophone
+monk
+auto
+##bus
+releasing
+egg
+dies
+interaction
+defender
+ce
+outbreak
+glory
+loving
+##bert
+sequel
+consciousness
+http
+awake
+ski
+enrolled
+##ress
+handling
+rookie
+brow
+somebody
+biography
+warfare
+amounts
+contracts
+presentation
+fabric
+dissolved
+challenged
+meter
+psychological
+lt
+elevated
+rally
+accurate
+##tha
+hospitals
+undergraduate
+specialist
+venezuela
+exhibit
+shed
+nursing
+protestant
+fluid
+structural
+footage
+jared
+consistent
+prey
+##ska
+succession
+reflect
+exile
+lebanon
+wiped
+suspect
+shanghai
+resting
+integration
+preservation
+marvel
+variant
+pirates
+sheep
+rounded
+capita
+sailing
+colonies
+manuscript
+deemed
+variations
+clarke
+functional
+emerging
+boxing
+relaxed
+curse
+azerbaijan
+heavyweight
+nickname
+editorial
+rang
+grid
+tightened
+earthquake
+flashed
+miguel
+rushing
+##ches
+improvements
+boxes
+brooks
+180
+consumption
+molecular
+felix
+societies
+repeatedly
+variation
+aids
+civic
+graphics
+professionals
+realm
+autonomous
+receiver
+delayed
+workshop
+militia
+chairs
+trump
+canyon
+##point
+harsh
+extending
+lovely
+happiness
+##jan
+stake
+eyebrows
+embassy
+wellington
+hannah
+##ella
+sony
+corners
+bishops
+swear
+cloth
+contents
+xi
+namely
+commenced
+1854
+stanford
+nashville
+courage
+graphic
+commitment
+garrison
+##bin
+hamlet
+clearing
+rebels
+attraction
+literacy
+cooking
+ruins
+temples
+jenny
+humanity
+celebrate
+hasn
+freight
+sixty
+rebel
+bastard
+##art
+newton
+##ada
+deer
+##ges
+##ching
+smiles
+delaware
+singers
+##ets
+approaching
+assists
+flame
+##ph
+boulevard
+barrel
+planted
+##ome
+pursuit
+##sia
+consequences
+posts
+shallow
+invitation
+rode
+depot
+ernest
+kane
+rod
+concepts
+preston
+topic
+chambers
+striking
+blast
+arrives
+descendants
+montgomery
+ranges
+worlds
+##lay
+##ari
+span
+chaos
+praise
+##ag
+fewer
+1855
+sanctuary
+mud
+fbi
+##ions
+programmes
+maintaining
+unity
+harper
+bore
+handsome
+closure
+tournaments
+thunder
+nebraska
+linda
+facade
+puts
+satisfied
+argentine
+dale
+cork
+dome
+panama
+##yl
+1858
+tasks
+experts
+##ates
+feeding
+equation
+##las
+##ida
+##tu
+engage
+bryan
+##ax
+um
+quartet
+melody
+disbanded
+sheffield
+blocked
+gasped
+delay
+kisses
+maggie
+connects
+##non
+sts
+poured
+creator
+publishers
+##we
+guided
+ellis
+extinct
+hug
+gaining
+##ord
+complicated
+##bility
+poll
+clenched
+investigate
+##use
+thereby
+quantum
+spine
+cdp
+humor
+kills
+administered
+semifinals
+##du
+encountered
+ignore
+##bu
+commentary
+##maker
+bother
+roosevelt
+140
+plains
+halfway
+flowing
+cultures
+crack
+imprisoned
+neighboring
+airline
+##ses
+##view
+##mate
+##ec
+gather
+wolves
+marathon
+transformed
+##ill
+cruise
+organisations
+carol
+punch
+exhibitions
+numbered
+alarm
+ratings
+daddy
+silently
+##stein
+queens
+colours
+impression
+guidance
+liu
+tactical
+##rat
+marshal
+della
+arrow
+##ings
+rested
+feared
+tender
+owns
+bitter
+advisor
+escort
+##ides
+spare
+farms
+grants
+##ene
+dragons
+encourage
+colleagues
+cameras
+##und
+sucked
+pile
+spirits
+prague
+statements
+suspension
+landmark
+fence
+torture
+recreation
+bags
+permanently
+survivors
+pond
+spy
+predecessor
+bombing
+coup
+##og
+protecting
+transformation
+glow
+##lands
+##book
+dug
+priests
+andrea
+feat
+barn
+jumping
+##chen
+##ologist
+##con
+casualties
+stern
+auckland
+pipe
+serie
+revealing
+ba
+##bel
+trevor
+mercy
+spectrum
+yang
+consist
+governing
+collaborated
+possessed
+epic
+comprises
+blew
+shane
+##ack
+lopez
+honored
+magical
+sacrifice
+judgment
+perceived
+hammer
+mtv
+baronet
+tune
+das
+missionary
+sheets
+350
+neutral
+oral
+threatening
+attractive
+shade
+aims
+seminary
+##master
+estates
+1856
+michel
+wounds
+refugees
+manufacturers
+##nic
+mercury
+syndrome
+porter
+##iya
+##din
+hamburg
+identification
+upstairs
+purse
+widened
+pause
+cared
+breathed
+affiliate
+santiago
+prevented
+celtic
+fisher
+125
+recruited
+byzantine
+reconstruction
+farther
+##mp
+diet
+sake
+au
+spite
+sensation
+##ert
+blank
+separation
+105
+##hon
+vladimir
+armies
+anime
+##lie
+accommodate
+orbit
+cult
+sofia
+archive
+##ify
+##box
+founders
+sustained
+disorder
+honours
+northeastern
+mia
+crops
+violet
+threats
+blanket
+fires
+canton
+followers
+southwestern
+prototype
+voyage
+assignment
+altered
+moderate
+protocol
+pistol
+##eo
+questioned
+brass
+lifting
+1852
+math
+authored
+##ual
+doug
+dimensional
+dynamic
+##san
+1851
+pronounced
+grateful
+quest
+uncomfortable
+boom
+presidency
+stevens
+relating
+politicians
+chen
+barrier
+quinn
+diana
+mosque
+tribal
+cheese
+palmer
+portions
+sometime
+chester
+treasure
+wu
+bend
+download
+millions
+reforms
+registration
+##osa
+consequently
+monitoring
+ate
+preliminary
+brandon
+invented
+ps
+eaten
+exterior
+intervention
+ports
+documented
+log
+displays
+lecture
+sally
+favourite
+##itz
+vermont
+lo
+invisible
+isle
+breed
+##ator
+journalists
+relay
+speaks
+backward
+explore
+midfielder
+actively
+stefan
+procedures
+cannon
+blond
+kenneth
+centered
+servants
+chains
+libraries
+malcolm
+essex
+henri
+slavery
+##hal
+facts
+fairy
+coached
+cassie
+cats
+washed
+cop
+##fi
+announcement
+item
+2000s
+vinyl
+activated
+marco
+frontier
+growled
+curriculum
+##das
+loyal
+accomplished
+leslie
+ritual
+kenny
+##00
+vii
+napoleon
+hollow
+hybrid
+jungle
+stationed
+friedrich
+counted
+##ulated
+platinum
+theatrical
+seated
+col
+rubber
+glen
+1840
+diversity
+healing
+extends
+id
+provisions
+administrator
+columbus
+##oe
+tributary
+te
+assured
+org
+##uous
+prestigious
+examined
+lectures
+grammy
+ronald
+associations
+bailey
+allan
+essays
+flute
+believing
+consultant
+proceedings
+travelling
+1853
+kit
+kerala
+yugoslavia
+buddy
+methodist
+##ith
+burial
+centres
+batman
+##nda
+discontinued
+bo
+dock
+stockholm
+lungs
+severely
+##nk
+citing
+manga
+##ugh
+steal
+mumbai
+iraqi
+robot
+celebrity
+bride
+broadcasts
+abolished
+pot
+joel
+overhead
+franz
+packed
+reconnaissance
+johann
+acknowledged
+introduce
+handled
+doctorate
+developments
+drinks
+alley
+palestine
+##nis
+##aki
+proceeded
+recover
+bradley
+grain
+patch
+afford
+infection
+nationalist
+legendary
+##ath
+interchange
+virtually
+gen
+gravity
+exploration
+amber
+vital
+wishes
+powell
+doctrine
+elbow
+screenplay
+##bird
+contribute
+indonesian
+pet
+creates
+##com
+enzyme
+kylie
+discipline
+drops
+manila
+hunger
+##ien
+layers
+suffer
+fever
+bits
+monica
+keyboard
+manages
+##hood
+searched
+appeals
+##bad
+testament
+grande
+reid
+##war
+beliefs
+congo
+##ification
+##dia
+si
+requiring
+##via
+casey
+1849
+regret
+streak
+rape
+depends
+syrian
+sprint
+pound
+tourists
+upcoming
+pub
+##xi
+tense
+##els
+practiced
+echo
+nationwide
+guild
+motorcycle
+liz
+##zar
+chiefs
+desired
+elena
+bye
+precious
+absorbed
+relatives
+booth
+pianist
+##mal
+citizenship
+exhausted
+wilhelm
+##ceae
+##hed
+noting
+quarterback
+urge
+hectares
+##gue
+ace
+holly
+##tal
+blonde
+davies
+parked
+sustainable
+stepping
+twentieth
+airfield
+galaxy
+nest
+chip
+##nell
+tan
+shaft
+paulo
+requirement
+##zy
+paradise
+tobacco
+trans
+renewed
+vietnamese
+##cker
+##ju
+suggesting
+catching
+holmes
+enjoying
+md
+trips
+colt
+holder
+butterfly
+nerve
+reformed
+cherry
+bowling
+trailer
+carriage
+goodbye
+appreciate
+toy
+joshua
+interactive
+enabled
+involve
+##kan
+collar
+determination
+bunch
+facebook
+recall
+shorts
+superintendent
+episcopal
+frustration
+giovanni
+nineteenth
+laser
+privately
+array
+circulation
+##ovic
+armstrong
+deals
+painful
+permit
+discrimination
+##wi
+aires
+retiring
+cottage
+ni
+##sta
+horizon
+ellen
+jamaica
+ripped
+fernando
+chapters
+playstation
+patron
+lecturer
+navigation
+behaviour
+genes
+georgian
+export
+solomon
+rivals
+swift
+seventeen
+rodriguez
+princeton
+independently
+sox
+1847
+arguing
+entity
+casting
+hank
+criteria
+oakland
+geographic
+milwaukee
+reflection
+expanding
+conquest
+dubbed
+##tv
+halt
+brave
+brunswick
+doi
+arched
+curtis
+divorced
+predominantly
+somerset
+streams
+ugly
+zoo
+horrible
+curved
+buenos
+fierce
+dictionary
+vector
+theological
+unions
+handful
+stability
+chan
+punjab
+segments
+##lly
+altar
+ignoring
+gesture
+monsters
+pastor
+##stone
+thighs
+unexpected
+operators
+abruptly
+coin
+compiled
+associates
+improving
+migration
+pin
+##ose
+compact
+collegiate
+reserved
+##urs
+quarterfinals
+roster
+restore
+assembled
+hurry
+oval
+##cies
+1846
+flags
+martha
+##del
+victories
+sharply
+##rated
+argues
+deadly
+neo
+drawings
+symbols
+performer
+##iel
+griffin
+restrictions
+editing
+andrews
+java
+journals
+arabia
+compositions
+dee
+pierce
+removing
+hindi
+casino
+runway
+civilians
+minds
+nasa
+hotels
+##zation
+refuge
+rent
+retain
+potentially
+conferences
+suburban
+conducting
+##tto
+##tions
+##tle
+descended
+massacre
+##cal
+ammunition
+terrain
+fork
+souls
+counts
+chelsea
+durham
+drives
+cab
+##bank
+perth
+realizing
+palestinian
+finn
+simpson
+##dal
+betty
+##ule
+moreover
+particles
+cardinals
+tent
+evaluation
+extraordinary
+##oid
+inscription
+##works
+wednesday
+chloe
+maintains
+panels
+ashley
+trucks
+##nation
+cluster
+sunlight
+strikes
+zhang
+##wing
+dialect
+canon
+##ap
+tucked
+##ws
+collecting
+##mas
+##can
+##sville
+maker
+quoted
+evan
+franco
+aria
+buying
+cleaning
+eva
+closet
+provision
+apollo
+clinic
+rat
+##ez
+necessarily
+ac
+##gle
+##ising
+venues
+flipped
+cent
+spreading
+trustees
+checking
+authorized
+##sco
+disappointed
+##ado
+notion
+duration
+trumpet
+hesitated
+topped
+brussels
+rolls
+theoretical
+hint
+define
+aggressive
+repeat
+wash
+peaceful
+optical
+width
+allegedly
+mcdonald
+strict
+copyright
+##illa
+investors
+mar
+jam
+witnesses
+sounding
+miranda
+michelle
+privacy
+hugo
+harmony
+##pp
+valid
+lynn
+glared
+nina
+102
+headquartered
+diving
+boarding
+gibson
+##ncy
+albanian
+marsh
+routine
+dealt
+enhanced
+er
+intelligent
+substance
+targeted
+enlisted
+discovers
+spinning
+observations
+pissed
+smoking
+rebecca
+capitol
+visa
+varied
+costume
+seemingly
+indies
+compensation
+surgeon
+thursday
+arsenal
+westminster
+suburbs
+rid
+anglican
+##ridge
+knots
+foods
+alumni
+lighter
+fraser
+whoever
+portal
+scandal
+##ray
+gavin
+advised
+instructor
+flooding
+terrorist
+##ale
+teenage
+interim
+senses
+duck
+teen
+thesis
+abby
+eager
+overcome
+##ile
+newport
+glenn
+rises
+shame
+##cc
+prompted
+priority
+forgot
+bomber
+nicolas
+protective
+360
+cartoon
+katherine
+breeze
+lonely
+trusted
+henderson
+richardson
+relax
+banner
+candy
+palms
+remarkable
+##rio
+legends
+cricketer
+essay
+ordained
+edmund
+rifles
+trigger
+##uri
+##away
+sail
+alert
+1830
+audiences
+penn
+sussex
+siblings
+pursued
+indianapolis
+resist
+rosa
+consequence
+succeed
+avoided
+1845
+##ulation
+inland
+##tie
+##nna
+counsel
+profession
+chronicle
+hurried
+##una
+eyebrow
+eventual
+bleeding
+innovative
+cure
+##dom
+committees
+accounting
+con
+scope
+hardy
+heather
+tenor
+gut
+herald
+codes
+tore
+scales
+wagon
+##oo
+luxury
+tin
+prefer
+fountain
+triangle
+bonds
+darling
+convoy
+dried
+traced
+beings
+troy
+accidentally
+slam
+findings
+smelled
+joey
+lawyers
+outcome
+steep
+bosnia
+configuration
+shifting
+toll
+brook
+performers
+lobby
+philosophical
+construct
+shrine
+aggregate
+boot
+cox
+phenomenon
+savage
+insane
+solely
+reynolds
+lifestyle
+##ima
+nationally
+holdings
+consideration
+enable
+edgar
+mo
+mama
+##tein
+fights
+relegation
+chances
+atomic
+hub
+conjunction
+awkward
+reactions
+currency
+finale
+kumar
+underwent
+steering
+elaborate
+gifts
+comprising
+melissa
+veins
+reasonable
+sunshine
+chi
+solve
+trails
+inhabited
+elimination
+ethics
+huh
+ana
+molly
+consent
+apartments
+layout
+marines
+##ces
+hunters
+bulk
+##oma
+hometown
+##wall
+##mont
+cracked
+reads
+neighbouring
+withdrawn
+admission
+wingspan
+damned
+anthology
+lancashire
+brands
+batting
+forgive
+cuban
+awful
+##lyn
+104
+dimensions
+imagination
+##ade
+dante
+##ship
+tracking
+desperately
+goalkeeper
+##yne
+groaned
+workshops
+confident
+burton
+gerald
+milton
+circus
+uncertain
+slope
+copenhagen
+sophia
+fog
+philosopher
+portraits
+accent
+cycling
+varying
+gripped
+larvae
+garrett
+specified
+scotia
+mature
+luther
+kurt
+rap
+##kes
+aerial
+750
+ferdinand
+heated
+es
+transported
+##shan
+safely
+nonetheless
+##orn
+##gal
+motors
+demanding
+##sburg
+startled
+##brook
+ally
+generate
+caps
+ghana
+stained
+demo
+mentions
+beds
+ap
+afterward
+diary
+##bling
+utility
+##iro
+richards
+1837
+conspiracy
+conscious
+shining
+footsteps
+observer
+cyprus
+urged
+loyalty
+developer
+probability
+olive
+upgraded
+gym
+miracle
+insects
+graves
+1844
+ourselves
+hydrogen
+amazon
+katie
+tickets
+poets
+##pm
+planes
+##pan
+prevention
+witnessed
+dense
+jin
+randy
+tang
+warehouse
+monroe
+bang
+archived
+elderly
+investigations
+alec
+granite
+mineral
+conflicts
+controlling
+aboriginal
+carlo
+##zu
+mechanics
+stan
+stark
+rhode
+skirt
+est
+##berry
+bombs
+respected
+##horn
+imposed
+limestone
+deny
+nominee
+memphis
+grabbing
+disabled
+##als
+amusement
+aa
+frankfurt
+corn
+referendum
+varies
+slowed
+disk
+firms
+unconscious
+incredible
+clue
+sue
+##zhou
+twist
+##cio
+joins
+idaho
+chad
+developers
+computing
+destroyer
+103
+mortal
+tucker
+kingston
+choices
+yu
+carson
+1800
+os
+whitney
+geneva
+pretend
+dimension
+staged
+plateau
+maya
+##une
+freestyle
+##bc
+rovers
+hiv
+##ids
+tristan
+classroom
+prospect
+##hus
+honestly
+diploma
+lied
+thermal
+auxiliary
+feast
+unlikely
+iata
+##tel
+morocco
+pounding
+treasury
+lithuania
+considerably
+1841
+dish
+1812
+geological
+matching
+stumbled
+destroying
+marched
+brien
+advances
+cake
+nicole
+belle
+settling
+measuring
+directing
+##mie
+tuesday
+bassist
+capabilities
+stunned
+fraud
+torpedo
+##list
+##phone
+anton
+wisdom
+surveillance
+ruined
+##ulate
+lawsuit
+healthcare
+theorem
+halls
+trend
+aka
+horizontal
+dozens
+acquire
+lasting
+swim
+hawk
+gorgeous
+fees
+vicinity
+decrease
+adoption
+tactics
+##ography
+pakistani
+##ole
+draws
+##hall
+willie
+burke
+heath
+algorithm
+integral
+powder
+elliott
+brigadier
+jackie
+tate
+varieties
+darker
+##cho
+lately
+cigarette
+specimens
+adds
+##ree
+##ensis
+##inger
+exploded
+finalist
+cia
+murders
+wilderness
+arguments
+nicknamed
+acceptance
+onwards
+manufacture
+robertson
+jets
+tampa
+enterprises
+blog
+loudly
+composers
+nominations
+1838
+ai
+malta
+inquiry
+automobile
+hosting
+viii
+rays
+tilted
+grief
+museums
+strategies
+furious
+euro
+equality
+cohen
+poison
+surrey
+wireless
+governed
+ridiculous
+moses
+##esh
+##room
+vanished
+##ito
+barnes
+attract
+morrison
+istanbul
+##iness
+absent
+rotation
+petition
+janet
+##logical
+satisfaction
+custody
+deliberately
+observatory
+comedian
+surfaces
+pinyin
+novelist
+strictly
+canterbury
+oslo
+monks
+embrace
+ibm
+jealous
+photograph
+continent
+dorothy
+marina
+doc
+excess
+holden
+allegations
+explaining
+stack
+avoiding
+lance
+storyline
+majesty
+poorly
+spike
+dos
+bradford
+raven
+travis
+classics
+proven
+voltage
+pillow
+fists
+butt
+1842
+interpreted
+##car
+1839
+gage
+telegraph
+lens
+promising
+expelled
+casual
+collector
+zones
+##min
+silly
+nintendo
+##kh
+##bra
+downstairs
+chef
+suspicious
+afl
+flies
+vacant
+uganda
+pregnancy
+condemned
+lutheran
+estimates
+cheap
+decree
+saxon
+proximity
+stripped
+idiot
+deposits
+contrary
+presenter
+magnus
+glacier
+im
+offense
+edwin
+##ori
+upright
+##long
+bolt
+##ois
+toss
+geographical
+##izes
+environments
+delicate
+marking
+abstract
+xavier
+nails
+windsor
+plantation
+occurring
+equity
+saskatchewan
+fears
+drifted
+sequences
+vegetation
+revolt
+##stic
+1843
+sooner
+fusion
+opposing
+nato
+skating
+1836
+secretly
+ruin
+lease
+##oc
+edit
+##nne
+flora
+anxiety
+ruby
+##ological
+##mia
+tel
+bout
+taxi
+emmy
+frost
+rainbow
+compounds
+foundations
+rainfall
+assassination
+nightmare
+dominican
+##win
+achievements
+deserve
+orlando
+intact
+armenia
+##nte
+calgary
+valentine
+106
+marion
+proclaimed
+theodore
+bells
+courtyard
+thigh
+gonzalez
+console
+troop
+minimal
+monte
+everyday
+##ence
+##if
+supporter
+terrorism
+buck
+openly
+presbyterian
+activists
+carpet
+##iers
+rubbing
+uprising
+##yi
+cute
+conceived
+legally
+##cht
+millennium
+cello
+velocity
+ji
+rescued
+cardiff
+1835
+rex
+concentrate
+senators
+beard
+rendered
+glowing
+battalions
+scouts
+competitors
+sculptor
+catalogue
+arctic
+ion
+raja
+bicycle
+wow
+glancing
+lawn
+##woman
+gentleman
+lighthouse
+publish
+predicted
+calculated
+##val
+variants
+##gne
+strain
+##ui
+winston
+deceased
+##nus
+touchdowns
+brady
+caleb
+sinking
+echoed
+crush
+hon
+blessed
+protagonist
+hayes
+endangered
+magnitude
+editors
+##tine
+estimate
+responsibilities
+##mel
+backup
+laying
+consumed
+sealed
+zurich
+lovers
+frustrated
+##eau
+ahmed
+kicking
+mit
+treasurer
+1832
+biblical
+refuse
+terrified
+pump
+agrees
+genuine
+imprisonment
+refuses
+plymouth
+##hen
+lou
+##nen
+tara
+trembling
+antarctic
+ton
+learns
+##tas
+crap
+crucial
+faction
+atop
+##borough
+wrap
+lancaster
+odds
+hopkins
+erik
+lyon
+##eon
+bros
+##ode
+snap
+locality
+tips
+empress
+crowned
+cal
+acclaimed
+chuckled
+##ory
+clara
+sends
+mild
+towel
+##fl
+##day
+##а
+wishing
+assuming
+interviewed
+##bal
+##die
+interactions
+eden
+cups
+helena
+##lf
+indie
+beck
+##fire
+batteries
+filipino
+wizard
+parted
+##lam
+traces
+##born
+rows
+idol
+albany
+delegates
+##ees
+##sar
+discussions
+##ex
+notre
+instructed
+belgrade
+highways
+suggestion
+lauren
+possess
+orientation
+alexandria
+abdul
+beats
+salary
+reunion
+ludwig
+alright
+wagner
+intimate
+pockets
+slovenia
+hugged
+brighton
+merchants
+cruel
+stole
+trek
+slopes
+repairs
+enrollment
+politically
+underlying
+promotional
+counting
+boeing
+##bb
+isabella
+naming
+##и
+keen
+bacteria
+listing
+separately
+belfast
+ussr
+450
+lithuanian
+anybody
+ribs
+sphere
+martinez
+cock
+embarrassed
+proposals
+fragments
+nationals
+##fs
+##wski
+premises
+fin
+1500
+alpine
+matched
+freely
+bounded
+jace
+sleeve
+##af
+gaming
+pier
+populated
+evident
+##like
+frances
+flooded
+##dle
+frightened
+pour
+trainer
+framed
+visitor
+challenging
+pig
+wickets
+##fold
+infected
+email
+##pes
+arose
+##aw
+reward
+ecuador
+oblast
+vale
+ch
+shuttle
+##usa
+bach
+rankings
+forbidden
+cornwall
+accordance
+salem
+consumers
+bruno
+fantastic
+toes
+machinery
+resolved
+julius
+remembering
+propaganda
+iceland
+bombardment
+tide
+contacts
+wives
+##rah
+concerto
+macdonald
+albania
+implement
+daisy
+tapped
+sudan
+helmet
+angela
+mistress
+##lic
+crop
+sunk
+finest
+##craft
+hostile
+##ute
+##tsu
+boxer
+fr
+paths
+adjusted
+habit
+ballot
+supervision
+soprano
+##zen
+bullets
+wicked
+sunset
+regiments
+disappear
+lamp
+performs
+app
+##gia
+##oa
+rabbit
+digging
+incidents
+entries
+##cion
+dishes
+##oi
+introducing
+##ati
+##fied
+freshman
+slot
+jill
+tackles
+baroque
+backs
+##iest
+lone
+sponsor
+destiny
+altogether
+convert
+##aro
+consensus
+shapes
+demonstration
+basically
+feminist
+auction
+artifacts
+##bing
+strongest
+twitter
+halifax
+2019
+allmusic
+mighty
+smallest
+precise
+alexandra
+viola
+##los
+##ille
+manuscripts
+##illo
+dancers
+ari
+managers
+monuments
+blades
+barracks
+springfield
+maiden
+consolidated
+electron
+##end
+berry
+airing
+wheat
+nobel
+inclusion
+blair
+payments
+geography
+bee
+cc
+eleanor
+react
+##hurst
+afc
+manitoba
+##yu
+su
+lineup
+fitness
+recreational
+investments
+airborne
+disappointment
+##dis
+edmonton
+viewing
+##row
+renovation
+##cast
+infant
+bankruptcy
+roses
+aftermath
+pavilion
+##yer
+carpenter
+withdrawal
+ladder
+##hy
+discussing
+popped
+reliable
+agreements
+rochester
+##abad
+curves
+bombers
+220
+rao
+reverend
+decreased
+choosing
+107
+stiff
+consulting
+naples
+crawford
+tracy
+ka
+ribbon
+cops
+##lee
+crushed
+deciding
+unified
+teenager
+accepting
+flagship
+explorer
+poles
+sanchez
+inspection
+revived
+skilled
+induced
+exchanged
+flee
+locals
+tragedy
+swallow
+loading
+hanna
+demonstrate
+##ela
+salvador
+flown
+contestants
+civilization
+##ines
+wanna
+rhodes
+fletcher
+hector
+knocking
+considers
+##ough
+nash
+mechanisms
+sensed
+mentally
+walt
+unclear
+##eus
+renovated
+madame
+##cks
+crews
+governmental
+##hin
+undertaken
+monkey
+##ben
+##ato
+fatal
+armored
+copa
+caves
+governance
+grasp
+perception
+certification
+froze
+damp
+tugged
+wyoming
+##rg
+##ero
+newman
+##lor
+nerves
+curiosity
+graph
+115
+##ami
+withdraw
+tunnels
+dull
+meredith
+moss
+exhibits
+neighbors
+communicate
+accuracy
+explored
+raiders
+republicans
+secular
+kat
+superman
+penny
+criticised
+##tch
+freed
+update
+conviction
+wade
+ham
+likewise
+delegation
+gotta
+doll
+promises
+technological
+myth
+nationality
+resolve
+convent
+##mark
+sharon
+dig
+sip
+coordinator
+entrepreneur
+fold
+##dine
+capability
+councillor
+synonym
+blown
+swan
+cursed
+1815
+jonas
+haired
+sofa
+canvas
+keeper
+rivalry
+##hart
+rapper
+speedway
+swords
+postal
+maxwell
+estonia
+potter
+recurring
+##nn
+##ave
+errors
+##oni
+cognitive
+1834
+##²
+claws
+nadu
+roberto
+bce
+wrestler
+ellie
+##ations
+infinite
+ink
+##tia
+presumably
+finite
+staircase
+108
+noel
+patricia
+nacional
+##cation
+chill
+eternal
+tu
+preventing
+prussia
+fossil
+limbs
+##logist
+ernst
+frog
+perez
+rene
+##ace
+pizza
+prussian
+##ios
+##vy
+molecules
+regulatory
+answering
+opinions
+sworn
+lengths
+supposedly
+hypothesis
+upward
+habitats
+seating
+ancestors
+drank
+yield
+hd
+synthesis
+researcher
+modest
+##var
+mothers
+peered
+voluntary
+homeland
+##the
+acclaim
+##igan
+static
+valve
+luxembourg
+alto
+carroll
+fe
+receptor
+norton
+ambulance
+##tian
+johnston
+catholics
+depicting
+jointly
+elephant
+gloria
+mentor
+badge
+ahmad
+distinguish
+remarked
+councils
+precisely
+allison
+advancing
+detection
+crowded
+##10
+cooperative
+ankle
+mercedes
+dagger
+surrendered
+pollution
+commit
+subway
+jeffrey
+lesson
+sculptures
+provider
+##fication
+membrane
+timothy
+rectangular
+fiscal
+heating
+teammate
+basket
+particle
+anonymous
+deployment
+##ple
+missiles
+courthouse
+proportion
+shoe
+sec
+##ller
+complaints
+forbes
+blacks
+abandon
+remind
+sizes
+overwhelming
+autobiography
+natalie
+##awa
+risks
+contestant
+countryside
+babies
+scorer
+invaded
+enclosed
+proceed
+hurling
+disorders
+##cu
+reflecting
+continuously
+cruiser
+graduates
+freeway
+investigated
+ore
+deserved
+maid
+blocking
+phillip
+jorge
+shakes
+dove
+mann
+variables
+lacked
+burden
+accompanying
+que
+consistently
+organizing
+provisional
+complained
+endless
+##rm
+tubes
+juice
+georges
+krishna
+mick
+labels
+thriller
+##uch
+laps
+arcade
+sage
+snail
+##table
+shannon
+fi
+laurence
+seoul
+vacation
+presenting
+hire
+churchill
+surprisingly
+prohibited
+savannah
+technically
+##oli
+170
+##lessly
+testimony
+suited
+speeds
+toys
+romans
+mlb
+flowering
+measurement
+talented
+kay
+settings
+charleston
+expectations
+shattered
+achieving
+triumph
+ceremonies
+portsmouth
+lanes
+mandatory
+loser
+stretching
+cologne
+realizes
+seventy
+cornell
+careers
+webb
+##ulating
+americas
+budapest
+ava
+suspicion
+##ison
+yo
+conrad
+##hai
+sterling
+jessie
+rector
+##az
+1831
+transform
+organize
+loans
+christine
+volcanic
+warrant
+slender
+summers
+subfamily
+newer
+danced
+dynamics
+rhine
+proceeds
+heinrich
+gastropod
+commands
+sings
+facilitate
+easter
+ra
+positioned
+responses
+expense
+fruits
+yanked
+imported
+25th
+velvet
+vic
+primitive
+tribune
+baldwin
+neighbourhood
+donna
+rip
+hay
+pr
+##uro
+1814
+espn
+welcomed
+##aria
+qualifier
+glare
+highland
+timing
+##cted
+shells
+eased
+geometry
+louder
+exciting
+slovakia
+##sion
+##iz
+##lot
+savings
+prairie
+##ques
+marching
+rafael
+tonnes
+##lled
+curtain
+preceding
+shy
+heal
+greene
+worthy
+##pot
+detachment
+bury
+sherman
+##eck
+reinforced
+seeks
+bottles
+contracted
+duchess
+outfit
+walsh
+##sc
+mickey
+##ase
+geoffrey
+archer
+squeeze
+dawson
+eliminate
+invention
+##enberg
+neal
+##eth
+stance
+dealer
+coral
+maple
+retire
+polo
+simplified
+##ht
+1833
+hid
+watts
+backwards
+jules
+##oke
+genesis
+mt
+frames
+rebounds
+burma
+woodland
+moist
+santos
+whispers
+drained
+subspecies
+##aa
+streaming
+ulster
+burnt
+correspondence
+maternal
+gerard
+denis
+stealing
+##load
+genius
+duchy
+##oria
+inaugurated
+momentum
+suits
+placement
+sovereign
+clause
+thames
+##hara
+confederation
+reservation
+sketch
+yankees
+lets
+rotten
+charm
+hal
+verses
+ultra
+commercially
+dot
+salon
+citation
+adopt
+winnipeg
+mist
+allocated
+cairo
+##boy
+jenkins
+interference
+objectives
+##wind
+1820
+portfolio
+armoured
+sectors
+##eh
+initiatives
+##world
+integrity
+exercises
+robe
+tap
+ab
+gazed
+##tones
+distracted
+rulers
+111
+favorable
+jerome
+tended
+cart
+factories
+##eri
+diplomat
+valued
+gravel
+charitable
+##try
+calvin
+exploring
+chang
+shepherd
+terrace
+pdf
+pupil
+##ural
+reflects
+ups
+##rch
+governors
+shelf
+depths
+##nberg
+trailed
+crest
+tackle
+##nian
+##ats
+hatred
+##kai
+clare
+makers
+ethiopia
+longtime
+detected
+embedded
+lacking
+slapped
+rely
+thomson
+anticipation
+iso
+morton
+successive
+agnes
+screenwriter
+straightened
+philippe
+playwright
+haunted
+licence
+iris
+intentions
+sutton
+112
+logical
+correctly
+##weight
+branded
+licked
+tipped
+silva
+ricky
+narrator
+requests
+##ents
+greeted
+supernatural
+cow
+##wald
+lung
+refusing
+employer
+strait
+gaelic
+liner
+##piece
+zoe
+sabha
+##mba
+driveway
+harvest
+prints
+bates
+reluctantly
+threshold
+algebra
+ira
+wherever
+coupled
+240
+assumption
+picks
+##air
+designers
+raids
+gentlemen
+##ean
+roller
+blowing
+leipzig
+locks
+screw
+dressing
+strand
+##lings
+scar
+dwarf
+depicts
+##nu
+nods
+##mine
+differ
+boris
+##eur
+yuan
+flip
+##gie
+mob
+invested
+questioning
+applying
+##ture
+shout
+##sel
+gameplay
+blamed
+illustrations
+bothered
+weakness
+rehabilitation
+##of
+##zes
+envelope
+rumors
+miners
+leicester
+subtle
+kerry
+##ico
+ferguson
+##fu
+premiership
+ne
+##cat
+bengali
+prof
+catches
+remnants
+dana
+##rily
+shouting
+presidents
+baltic
+ought
+ghosts
+dances
+sailors
+shirley
+fancy
+dominic
+##bie
+madonna
+##rick
+bark
+buttons
+gymnasium
+ashes
+liver
+toby
+oath
+providence
+doyle
+evangelical
+nixon
+cement
+carnegie
+embarked
+hatch
+surroundings
+guarantee
+needing
+pirate
+essence
+##bee
+filter
+crane
+hammond
+projected
+immune
+percy
+twelfth
+##ult
+regent
+doctoral
+damon
+mikhail
+##ichi
+lu
+critically
+elect
+realised
+abortion
+acute
+screening
+mythology
+steadily
+##fc
+frown
+nottingham
+kirk
+wa
+minneapolis
+##rra
+module
+algeria
+mc
+nautical
+encounters
+surprising
+statues
+availability
+shirts
+pie
+alma
+brows
+munster
+mack
+soup
+crater
+tornado
+sanskrit
+cedar
+explosive
+bordered
+dixon
+planets
+stamp
+exam
+happily
+##bble
+carriers
+kidnapped
+##vis
+accommodation
+emigrated
+##met
+knockout
+correspondent
+violation
+profits
+peaks
+lang
+specimen
+agenda
+ancestry
+pottery
+spelling
+equations
+obtaining
+ki
+linking
+1825
+debris
+asylum
+##20
+buddhism
+teddy
+##ants
+gazette
+##nger
+##sse
+dental
+eligibility
+utc
+fathers
+averaged
+zimbabwe
+francesco
+coloured
+hissed
+translator
+lynch
+mandate
+humanities
+mackenzie
+uniforms
+lin
+##iana
+##gio
+asset
+mhz
+fitting
+samantha
+genera
+wei
+rim
+beloved
+shark
+riot
+entities
+expressions
+indo
+carmen
+slipping
+owing
+abbot
+neighbor
+sidney
+##av
+rats
+recommendations
+encouraging
+squadrons
+anticipated
+commanders
+conquered
+##oto
+donations
+diagnosed
+##mond
+divide
+##iva
+guessed
+decoration
+vernon
+auditorium
+revelation
+conversations
+##kers
+##power
+herzegovina
+dash
+alike
+protested
+lateral
+herman
+accredited
+mg
+##gent
+freeman
+mel
+fiji
+crow
+crimson
+##rine
+livestock
+##pped
+humanitarian
+bored
+oz
+whip
+##lene
+##ali
+legitimate
+alter
+grinning
+spelled
+anxious
+oriental
+wesley
+##nin
+##hole
+carnival
+controller
+detect
+##ssa
+bowed
+educator
+kosovo
+macedonia
+##sin
+occupy
+mastering
+stephanie
+janeiro
+para
+unaware
+nurses
+noon
+135
+cam
+hopefully
+ranger
+combine
+sociology
+polar
+rica
+##eer
+neill
+##sman
+holocaust
+##ip
+doubled
+lust
+1828
+109
+decent
+cooling
+unveiled
+##card
+1829
+nsw
+homer
+chapman
+meyer
+##gin
+dive
+mae
+reagan
+expertise
+##gled
+darwin
+brooke
+sided
+prosecution
+investigating
+comprised
+petroleum
+genres
+reluctant
+differently
+trilogy
+johns
+vegetables
+corpse
+highlighted
+lounge
+pension
+unsuccessfully
+elegant
+aided
+ivory
+beatles
+amelia
+cain
+dubai
+sunny
+immigrant
+babe
+click
+##nder
+underwater
+pepper
+combining
+mumbled
+atlas
+horns
+accessed
+ballad
+physicians
+homeless
+gestured
+rpm
+freak
+louisville
+corporations
+patriots
+prizes
+rational
+warn
+modes
+decorative
+overnight
+din
+troubled
+phantom
+##ort
+monarch
+sheer
+##dorf
+generals
+guidelines
+organs
+addresses
+##zon
+enhance
+curling
+parishes
+cord
+##kie
+linux
+caesar
+deutsche
+bavaria
+##bia
+coleman
+cyclone
+##eria
+bacon
+petty
+##yama
+##old
+hampton
+diagnosis
+1824
+throws
+complexity
+rita
+disputed
+##₃
+pablo
+##sch
+marketed
+trafficking
+##ulus
+examine
+plague
+formats
+##oh
+vault
+faithful
+##bourne
+webster
+##ox
+highlights
+##ient
+##ann
+phones
+vacuum
+sandwich
+modeling
+##gated
+bolivia
+clergy
+qualities
+isabel
+##nas
+##ars
+wears
+screams
+reunited
+annoyed
+bra
+##ancy
+##rate
+differential
+transmitter
+tattoo
+container
+poker
+##och
+excessive
+resides
+cowboys
+##tum
+augustus
+trash
+providers
+statute
+retreated
+balcony
+reversed
+void
+storey
+preceded
+masses
+leap
+laughs
+neighborhoods
+wards
+schemes
+falcon
+santo
+battlefield
+pad
+ronnie
+thread
+lesbian
+venus
+##dian
+beg
+sandstone
+daylight
+punched
+gwen
+analog
+stroked
+wwe
+acceptable
+measurements
+dec
+toxic
+##kel
+adequate
+surgical
+economist
+parameters
+varsity
+##sberg
+quantity
+ella
+##chy
+##rton
+countess
+generating
+precision
+diamonds
+expressway
+ga
+##ı
+1821
+uruguay
+talents
+galleries
+expenses
+scanned
+colleague
+outlets
+ryder
+lucien
+##ila
+paramount
+##bon
+syracuse
+dim
+fangs
+gown
+sweep
+##sie
+toyota
+missionaries
+websites
+##nsis
+sentences
+adviser
+val
+trademark
+spells
+##plane
+patience
+starter
+slim
+##borg
+toe
+incredibly
+shoots
+elliot
+nobility
+##wyn
+cowboy
+endorsed
+gardner
+tendency
+persuaded
+organisms
+emissions
+kazakhstan
+amused
+boring
+chips
+themed
+##hand
+llc
+constantinople
+chasing
+systematic
+guatemala
+borrowed
+erin
+carey
+##hard
+highlands
+struggles
+1810
+##ifying
+##ced
+wong
+exceptions
+develops
+enlarged
+kindergarten
+castro
+##ern
+##rina
+leigh
+zombie
+juvenile
+##most
+consul
+##nar
+sailor
+hyde
+clarence
+intensive
+pinned
+nasty
+useless
+jung
+clayton
+stuffed
+exceptional
+ix
+apostolic
+230
+transactions
+##dge
+exempt
+swinging
+cove
+religions
+##ash
+shields
+dairy
+bypass
+190
+pursuing
+bug
+joyce
+bombay
+chassis
+southampton
+chat
+interact
+redesignated
+##pen
+nascar
+pray
+salmon
+rigid
+regained
+malaysian
+grim
+publicity
+constituted
+capturing
+toilet
+delegate
+purely
+tray
+drift
+loosely
+striker
+weakened
+trinidad
+mitch
+itv
+defines
+transmitted
+ming
+scarlet
+nodding
+fitzgerald
+fu
+narrowly
+sp
+tooth
+standings
+virtue
+##₁
+##wara
+##cting
+chateau
+gloves
+lid
+##nel
+hurting
+conservatory
+##pel
+sinclair
+reopened
+sympathy
+nigerian
+strode
+advocated
+optional
+chronic
+discharge
+##rc
+suck
+compatible
+laurel
+stella
+shi
+fails
+wage
+dodge
+128
+informal
+sorts
+levi
+buddha
+villagers
+##aka
+chronicles
+heavier
+summoned
+gateway
+3000
+eleventh
+jewelry
+translations
+accordingly
+seas
+##ency
+fiber
+pyramid
+cubic
+dragging
+##ista
+caring
+##ops
+android
+contacted
+lunar
+##dt
+kai
+lisbon
+patted
+1826
+sacramento
+theft
+madagascar
+subtropical
+disputes
+ta
+holidays
+piper
+willow
+mare
+cane
+itunes
+newfoundland
+benny
+companions
+dong
+raj
+observe
+roar
+charming
+plaque
+tibetan
+fossils
+enacted
+manning
+bubble
+tina
+tanzania
+##eda
+##hir
+funk
+swamp
+deputies
+cloak
+ufc
+scenario
+par
+scratch
+metals
+anthem
+guru
+engaging
+specially
+##boat
+dialects
+nineteen
+cecil
+duet
+disability
+messenger
+unofficial
+##lies
+defunct
+eds
+moonlight
+drainage
+surname
+puzzle
+honda
+switching
+conservatives
+mammals
+knox
+broadcaster
+sidewalk
+cope
+##ried
+benson
+princes
+peterson
+##sal
+bedford
+sharks
+eli
+wreck
+alberto
+gasp
+archaeology
+lgbt
+teaches
+securities
+madness
+compromise
+waving
+coordination
+davidson
+visions
+leased
+possibilities
+eighty
+jun
+fernandez
+enthusiasm
+assassin
+sponsorship
+reviewer
+kingdoms
+estonian
+laboratories
+##fy
+##nal
+applies
+verb
+celebrations
+##zzo
+rowing
+lightweight
+sadness
+submit
+mvp
+balanced
+dude
+##vas
+explicitly
+metric
+magnificent
+mound
+brett
+mohammad
+mistakes
+irregular
+##hing
+##ass
+sanders
+betrayed
+shipped
+surge
+##enburg
+reporters
+termed
+georg
+pity
+verbal
+bulls
+abbreviated
+enabling
+appealed
+##are
+##atic
+sicily
+sting
+heel
+sweetheart
+bart
+spacecraft
+brutal
+monarchy
+##tter
+aberdeen
+cameo
+diane
+##ub
+survivor
+clyde
+##aries
+complaint
+##makers
+clarinet
+delicious
+chilean
+karnataka
+coordinates
+1818
+panties
+##rst
+pretending
+ar
+dramatically
+kiev
+bella
+tends
+distances
+113
+catalog
+launching
+instances
+telecommunications
+portable
+lindsay
+vatican
+##eim
+angles
+aliens
+marker
+stint
+screens
+bolton
+##rne
+judy
+wool
+benedict
+plasma
+europa
+spark
+imaging
+filmmaker
+swiftly
+##een
+contributor
+##nor
+opted
+stamps
+apologize
+financing
+butter
+gideon
+sophisticated
+alignment
+avery
+chemicals
+yearly
+speculation
+prominence
+professionally
+##ils
+immortal
+institutional
+inception
+wrists
+identifying
+tribunal
+derives
+gains
+##wo
+papal
+preference
+linguistic
+vince
+operative
+brewery
+##ont
+unemployment
+boyd
+##ured
+##outs
+albeit
+prophet
+1813
+bi
+##rr
+##face
+##rad
+quarterly
+asteroid
+cleaned
+radius
+temper
+##llen
+telugu
+jerk
+viscount
+menu
+##ote
+glimpse
+##aya
+yacht
+hawaiian
+baden
+##rl
+laptop
+readily
+##gu
+monetary
+offshore
+scots
+watches
+##yang
+##arian
+upgrade
+needle
+xbox
+lea
+encyclopedia
+flank
+fingertips
+##pus
+delight
+teachings
+confirm
+roth
+beaches
+midway
+winters
+##iah
+teasing
+daytime
+beverly
+gambling
+bonnie
+##backs
+regulated
+clement
+hermann
+tricks
+knot
+##shing
+##uring
+##vre
+detached
+ecological
+owed
+specialty
+byron
+inventor
+bats
+stays
+screened
+unesco
+midland
+trim
+affection
+##ander
+##rry
+jess
+thoroughly
+feedback
+##uma
+chennai
+strained
+heartbeat
+wrapping
+overtime
+pleaded
+##sworth
+mon
+leisure
+oclc
+##tate
+##ele
+feathers
+angelo
+thirds
+nuts
+surveys
+clever
+gill
+commentator
+##dos
+darren
+rides
+gibraltar
+##nc
+##mu
+dissolution
+dedication
+shin
+meals
+saddle
+elvis
+reds
+chaired
+taller
+appreciation
+functioning
+niece
+favored
+advocacy
+robbie
+criminals
+suffolk
+yugoslav
+passport
+constable
+congressman
+hastings
+vera
+##rov
+consecrated
+sparks
+ecclesiastical
+confined
+##ovich
+muller
+floyd
+nora
+1822
+paved
+1827
+cumberland
+ned
+saga
+spiral
+##flow
+appreciated
+yi
+collaborative
+treating
+similarities
+feminine
+finishes
+##ib
+jade
+import
+##nse
+##hot
+champagne
+mice
+securing
+celebrities
+helsinki
+attributes
+##gos
+cousins
+phases
+ache
+lucia
+gandhi
+submission
+vicar
+spear
+shine
+tasmania
+biting
+detention
+constitute
+tighter
+seasonal
+##gus
+terrestrial
+matthews
+##oka
+effectiveness
+parody
+philharmonic
+##onic
+1816
+strangers
+encoded
+consortium
+guaranteed
+regards
+shifts
+tortured
+collision
+supervisor
+inform
+broader
+insight
+theaters
+armour
+emeritus
+blink
+incorporates
+mapping
+##50
+##ein
+handball
+flexible
+##nta
+substantially
+generous
+thief
+##own
+carr
+loses
+1793
+prose
+ucla
+romeo
+generic
+metallic
+realization
+damages
+mk
+commissioners
+zach
+default
+##ther
+helicopters
+lengthy
+stems
+spa
+partnered
+spectators
+rogue
+indication
+penalties
+teresa
+1801
+sen
+##tric
+dalton
+##wich
+irving
+photographic
+##vey
+dell
+deaf
+peters
+excluded
+unsure
+##vable
+patterson
+crawled
+##zio
+resided
+whipped
+latvia
+slower
+ecole
+pipes
+employers
+maharashtra
+comparable
+va
+textile
+pageant
+##gel
+alphabet
+binary
+irrigation
+chartered
+choked
+antoine
+offs
+waking
+supplement
+##wen
+quantities
+demolition
+regain
+locate
+urdu
+folks
+alt
+114
+##mc
+scary
+andreas
+whites
+##ava
+classrooms
+mw
+aesthetic
+publishes
+valleys
+guides
+cubs
+johannes
+bryant
+conventions
+affecting
+##itt
+drain
+awesome
+isolation
+prosecutor
+ambitious
+apology
+captive
+downs
+atmospheric
+lorenzo
+aisle
+beef
+foul
+##onia
+kidding
+composite
+disturbed
+illusion
+natives
+##ffer
+emi
+rockets
+riverside
+wartime
+painters
+adolf
+melted
+##ail
+uncertainty
+simulation
+hawks
+progressed
+meantime
+builder
+spray
+breach
+unhappy
+regina
+russians
+##urg
+determining
+##tation
+tram
+1806
+##quin
+aging
+##12
+1823
+garion
+rented
+mister
+diaz
+terminated
+clip
+1817
+depend
+nervously
+disco
+owe
+defenders
+shiva
+notorious
+disbelief
+shiny
+worcester
+##gation
+##yr
+trailing
+undertook
+islander
+belarus
+limitations
+watershed
+fuller
+overlooking
+utilized
+raphael
+1819
+synthetic
+breakdown
+klein
+##nate
+moaned
+memoir
+lamb
+practicing
+##erly
+cellular
+arrows
+exotic
+##graphy
+witches
+117
+charted
+rey
+hut
+hierarchy
+subdivision
+freshwater
+giuseppe
+aloud
+reyes
+qatar
+marty
+sideways
+utterly
+sexually
+jude
+prayers
+mccarthy
+softball
+blend
+damien
+##gging
+##metric
+wholly
+erupted
+lebanese
+negro
+revenues
+tasted
+comparative
+teamed
+transaction
+labeled
+maori
+sovereignty
+parkway
+trauma
+gran
+malay
+121
+advancement
+descendant
+2020
+buzz
+salvation
+inventory
+symbolic
+##making
+antarctica
+mps
+##gas
+##bro
+mohammed
+myanmar
+holt
+submarines
+tones
+##lman
+locker
+patriarch
+bangkok
+emerson
+remarks
+predators
+kin
+afghan
+confession
+norwich
+rental
+emerge
+advantages
+##zel
+rca
+##hold
+shortened
+storms
+aidan
+##matic
+autonomy
+compliance
+##quet
+dudley
+atp
+##osis
+1803
+motto
+documentation
+summary
+professors
+spectacular
+christina
+archdiocese
+flashing
+innocence
+remake
+##dell
+psychic
+reef
+scare
+employ
+rs
+sticks
+meg
+gus
+leans
+##ude
+accompany
+bergen
+tomas
+##iko
+doom
+wages
+pools
+##nch
+##bes
+breasts
+scholarly
+alison
+outline
+brittany
+breakthrough
+willis
+realistic
+##cut
+##boro
+competitor
+##stan
+pike
+picnic
+icon
+designing
+commercials
+washing
+villain
+skiing
+micro
+costumes
+auburn
+halted
+executives
+##hat
+logistics
+cycles
+vowel
+applicable
+barrett
+exclaimed
+eurovision
+eternity
+ramon
+##umi
+##lls
+modifications
+sweeping
+disgust
+##uck
+torch
+aviv
+ensuring
+rude
+dusty
+sonic
+donovan
+outskirts
+cu
+pathway
+##band
+##gun
+##lines
+disciplines
+acids
+cadet
+paired
+##40
+sketches
+##sive
+marriages
+##⁺
+folding
+peers
+slovak
+implies
+admired
+##beck
+1880s
+leopold
+instinct
+attained
+weston
+megan
+horace
+##ination
+dorsal
+ingredients
+evolutionary
+##its
+complications
+deity
+lethal
+brushing
+levy
+deserted
+institutes
+posthumously
+delivering
+telescope
+coronation
+motivated
+rapids
+luc
+flicked
+pays
+volcano
+tanner
+weighed
+##nica
+crowds
+frankie
+gifted
+addressing
+granddaughter
+winding
+##rna
+constantine
+gomez
+##front
+landscapes
+rudolf
+anthropology
+slate
+werewolf
+##lio
+astronomy
+circa
+rouge
+dreaming
+sack
+knelt
+drowned
+naomi
+prolific
+tracked
+freezing
+herb
+##dium
+agony
+randall
+twisting
+wendy
+deposit
+touches
+vein
+wheeler
+##bbled
+##bor
+batted
+retaining
+tire
+presently
+compare
+specification
+daemon
+nigel
+##grave
+merry
+recommendation
+czechoslovakia
+sandra
+ng
+roma
+##sts
+lambert
+inheritance
+sheikh
+winchester
+cries
+examining
+##yle
+comeback
+cuisine
+nave
+##iv
+ko
+retrieve
+tomatoes
+barker
+polished
+defining
+irene
+lantern
+personalities
+begging
+tract
+swore
+1809
+175
+##gic
+omaha
+brotherhood
+##rley
+haiti
+##ots
+exeter
+##ete
+##zia
+steele
+dumb
+pearson
+210
+surveyed
+elisabeth
+trends
+##ef
+fritz
+##rf
+premium
+bugs
+fraction
+calmly
+viking
+##birds
+tug
+inserted
+unusually
+##ield
+confronted
+distress
+crashing
+brent
+turks
+resign
+##olo
+cambodia
+gabe
+sauce
+##kal
+evelyn
+116
+extant
+clusters
+quarry
+teenagers
+luna
+##lers
+##ister
+affiliation
+drill
+##ashi
+panthers
+scenic
+libya
+anita
+strengthen
+inscriptions
+##cated
+lace
+sued
+judith
+riots
+##uted
+mint
+##eta
+preparations
+midst
+dub
+challenger
+##vich
+mock
+cf
+displaced
+wicket
+breaths
+enables
+schmidt
+analyst
+##lum
+ag
+highlight
+automotive
+axe
+josef
+newark
+sufficiently
+resembles
+50th
+##pal
+flushed
+mum
+traits
+##ante
+commodore
+incomplete
+warming
+titular
+ceremonial
+ethical
+118
+celebrating
+eighteenth
+cao
+lima
+medalist
+mobility
+strips
+snakes
+##city
+miniature
+zagreb
+barton
+escapes
+umbrella
+automated
+doubted
+differs
+cooled
+georgetown
+dresden
+cooked
+fade
+wyatt
+rna
+jacobs
+carlton
+abundant
+stereo
+boost
+madras
+inning
+##hia
+spur
+ip
+malayalam
+begged
+osaka
+groan
+escaping
+charging
+dose
+vista
+##aj
+bud
+papa
+communists
+advocates
+edged
+tri
+##cent
+resemble
+peaking
+necklace
+fried
+montenegro
+saxony
+goose
+glances
+stuttgart
+curator
+recruit
+grocery
+sympathetic
+##tting
+##fort
+127
+lotus
+randolph
+ancestor
+##rand
+succeeding
+jupiter
+1798
+macedonian
+##heads
+hiking
+1808
+handing
+fischer
+##itive
+garbage
+node
+##pies
+prone
+singular
+papua
+inclined
+attractions
+italia
+pouring
+motioned
+grandma
+garnered
+jacksonville
+corp
+ego
+ringing
+aluminum
+##hausen
+ordering
+##foot
+drawer
+traders
+synagogue
+##play
+##kawa
+resistant
+wandering
+fragile
+fiona
+teased
+var
+hardcore
+soaked
+jubilee
+decisive
+exposition
+mercer
+poster
+valencia
+hale
+kuwait
+1811
+##ises
+##wr
+##eed
+tavern
+gamma
+122
+johan
+##uer
+airways
+amino
+gil
+##ury
+vocational
+domains
+torres
+##sp
+generator
+folklore
+outcomes
+##keeper
+canberra
+shooter
+fl
+beams
+confrontation
+##lling
+##gram
+feb
+aligned
+forestry
+pipeline
+jax
+motorway
+conception
+decay
+##tos
+coffin
+##cott
+stalin
+1805
+escorted
+minded
+##nam
+sitcom
+purchasing
+twilight
+veronica
+additions
+passive
+tensions
+straw
+123
+frequencies
+1804
+refugee
+cultivation
+##iate
+christie
+clary
+bulletin
+crept
+disposal
+##rich
+##zong
+processor
+crescent
+##rol
+bmw
+emphasized
+whale
+nazis
+aurora
+##eng
+dwelling
+hauled
+sponsors
+toledo
+mega
+ideology
+theatres
+tessa
+cerambycidae
+saves
+turtle
+cone
+suspects
+kara
+rusty
+yelling
+greeks
+mozart
+shades
+cocked
+participant
+##tro
+shire
+spit
+freeze
+necessity
+##cos
+inmates
+nielsen
+councillors
+loaned
+uncommon
+omar
+peasants
+botanical
+offspring
+daniels
+formations
+jokes
+1794
+pioneers
+sigma
+licensing
+##sus
+wheelchair
+polite
+1807
+liquor
+pratt
+trustee
+##uta
+forewings
+balloon
+##zz
+kilometre
+camping
+explicit
+casually
+shawn
+foolish
+teammates
+nm
+hassan
+carrie
+judged
+satisfy
+vanessa
+knives
+selective
+cnn
+flowed
+##lice
+eclipse
+stressed
+eliza
+mathematician
+cease
+cultivated
+##roy
+commissions
+browns
+##ania
+destroyers
+sheridan
+meadow
+##rius
+minerals
+##cial
+downstream
+clash
+gram
+memoirs
+ventures
+baha
+seymour
+archie
+midlands
+edith
+fare
+flynn
+invite
+canceled
+tiles
+stabbed
+boulder
+incorporate
+amended
+camden
+facial
+mollusk
+unreleased
+descriptions
+yoga
+grabs
+550
+raises
+ramp
+shiver
+##rose
+coined
+pioneering
+tunes
+qing
+warwick
+tops
+119
+melanie
+giles
+##rous
+wandered
+##inal
+annexed
+nov
+30th
+unnamed
+##ished
+organizational
+airplane
+normandy
+stoke
+whistle
+blessing
+violations
+chased
+holders
+shotgun
+##ctic
+outlet
+reactor
+##vik
+tires
+tearing
+shores
+fortified
+mascot
+constituencies
+nc
+columnist
+productive
+tibet
+##rta
+lineage
+hooked
+oct
+tapes
+judging
+cody
+##gger
+hansen
+kashmir
+triggered
+##eva
+solved
+cliffs
+##tree
+resisted
+anatomy
+protesters
+transparent
+implied
+##iga
+injection
+mattress
+excluding
+##mbo
+defenses
+helpless
+devotion
+##elli
+growl
+liberals
+weber
+phenomena
+atoms
+plug
+##iff
+mortality
+apprentice
+howe
+convincing
+aaa
+swimmer
+barber
+leone
+promptly
+sodium
+def
+nowadays
+arise
+##oning
+gloucester
+corrected
+dignity
+norm
+erie
+##ders
+elders
+evacuated
+sylvia
+compression
+##yar
+hartford
+pose
+backpack
+reasoning
+accepts
+24th
+wipe
+millimetres
+marcel
+##oda
+dodgers
+albion
+1790
+overwhelmed
+aerospace
+oaks
+1795
+showcase
+acknowledge
+recovering
+nolan
+ashe
+hurts
+geology
+fashioned
+disappearance
+farewell
+swollen
+shrug
+marquis
+wimbledon
+124
+rue
+1792
+commemorate
+reduces
+experiencing
+inevitable
+calcutta
+intel
+##court
+murderer
+sticking
+fisheries
+imagery
+bloom
+280
+brake
+##inus
+gustav
+hesitation
+memorable
+po
+viral
+beans
+accidents
+tunisia
+antenna
+spilled
+consort
+treatments
+aye
+perimeter
+##gard
+donation
+hostage
+migrated
+banker
+addiction
+apex
+lil
+trout
+##ously
+conscience
+##nova
+rams
+sands
+genome
+passionate
+troubles
+##lets
+##set
+amid
+##ibility
+##ret
+higgins
+exceed
+vikings
+##vie
+payne
+##zan
+muscular
+##ste
+defendant
+sucking
+##wal
+ibrahim
+fuselage
+claudia
+vfl
+europeans
+snails
+interval
+##garh
+preparatory
+statewide
+tasked
+lacrosse
+viktor
+##lation
+angola
+##hra
+flint
+implications
+employs
+teens
+patrons
+stall
+weekends
+barriers
+scrambled
+nucleus
+tehran
+jenna
+parsons
+lifelong
+robots
+displacement
+5000
+##bles
+precipitation
+##gt
+knuckles
+clutched
+1802
+marrying
+ecology
+marx
+accusations
+declare
+scars
+kolkata
+mat
+meadows
+bermuda
+skeleton
+finalists
+vintage
+crawl
+coordinate
+affects
+subjected
+orchestral
+mistaken
+##tc
+mirrors
+dipped
+relied
+260
+arches
+candle
+##nick
+incorporating
+wildly
+fond
+basilica
+owl
+fringe
+rituals
+whispering
+stirred
+feud
+tertiary
+slick
+goat
+honorable
+whereby
+skip
+ricardo
+stripes
+parachute
+adjoining
+submerged
+synthesizer
+##gren
+intend
+positively
+ninety
+phi
+beaver
+partition
+fellows
+alexis
+prohibition
+carlisle
+bizarre
+fraternity
+##bre
+doubts
+icy
+cbc
+aquatic
+sneak
+sonny
+combines
+airports
+crude
+supervised
+spatial
+merge
+alfonso
+##bic
+corrupt
+scan
+undergo
+##ams
+disabilities
+colombian
+comparing
+dolphins
+perkins
+##lish
+reprinted
+unanimous
+bounced
+hairs
+underworld
+midwest
+semester
+bucket
+paperback
+miniseries
+coventry
+demise
+##leigh
+demonstrations
+sensor
+rotating
+yan
+##hler
+arrange
+soils
+##idge
+hyderabad
+labs
+##dr
+brakes
+grandchildren
+##nde
+negotiated
+rover
+ferrari
+continuation
+directorate
+augusta
+stevenson
+counterpart
+gore
+##rda
+nursery
+rican
+ave
+collectively
+broadly
+pastoral
+repertoire
+asserted
+discovering
+nordic
+styled
+fiba
+cunningham
+harley
+middlesex
+survives
+tumor
+tempo
+zack
+aiming
+lok
+urgent
+##rade
+##nto
+devils
+##ement
+contractor
+turin
+##wl
+##ool
+bliss
+repaired
+simmons
+moan
+astronomical
+cr
+negotiate
+lyric
+1890s
+lara
+bred
+clad
+angus
+pbs
+##ience
+engineered
+posed
+##lk
+hernandez
+possessions
+elbows
+psychiatric
+strokes
+confluence
+electorate
+lifts
+campuses
+lava
+alps
+##ep
+##ution
+##date
+physicist
+woody
+##page
+##ographic
+##itis
+juliet
+reformation
+sparhawk
+320
+complement
+suppressed
+jewel
+##½
+floated
+##kas
+continuity
+sadly
+##ische
+inability
+melting
+scanning
+paula
+flour
+judaism
+safer
+vague
+##lm
+solving
+curb
+##stown
+financially
+gable
+bees
+expired
+miserable
+cassidy
+dominion
+1789
+cupped
+145
+robbery
+facto
+amos
+warden
+resume
+tallest
+marvin
+ing
+pounded
+usd
+declaring
+gasoline
+##aux
+darkened
+270
+650
+sophomore
+##mere
+erection
+gossip
+televised
+risen
+dial
+##eu
+pillars
+##link
+passages
+profound
+##tina
+arabian
+ashton
+silicon
+nail
+##ead
+##lated
+##wer
+##hardt
+fleming
+firearms
+ducked
+circuits
+blows
+waterloo
+titans
+##lina
+atom
+fireplace
+cheshire
+financed
+activation
+algorithms
+##zzi
+constituent
+catcher
+cherokee
+partnerships
+sexuality
+platoon
+tragic
+vivian
+guarded
+whiskey
+meditation
+poetic
+##late
+##nga
+##ake
+porto
+listeners
+dominance
+kendra
+mona
+chandler
+factions
+22nd
+salisbury
+attitudes
+derivative
+##ido
+##haus
+intake
+paced
+javier
+illustrator
+barrels
+bias
+cockpit
+burnett
+dreamed
+ensuing
+##anda
+receptors
+someday
+hawkins
+mattered
+##lal
+slavic
+1799
+jesuit
+cameroon
+wasted
+tai
+wax
+lowering
+victorious
+freaking
+outright
+hancock
+librarian
+sensing
+bald
+calcium
+myers
+tablet
+announcing
+barack
+shipyard
+pharmaceutical
+##uan
+greenwich
+flush
+medley
+patches
+wolfgang
+pt
+speeches
+acquiring
+exams
+nikolai
+##gg
+hayden
+kannada
+##type
+reilly
+##pt
+waitress
+abdomen
+devastated
+capped
+pseudonym
+pharmacy
+fulfill
+paraguay
+1796
+clicked
+##trom
+archipelago
+syndicated
+##hman
+lumber
+orgasm
+rejection
+clifford
+lorraine
+advent
+mafia
+rodney
+brock
+##ght
+##used
+##elia
+cassette
+chamberlain
+despair
+mongolia
+sensors
+developmental
+upstream
+##eg
+##alis
+spanning
+165
+trombone
+basque
+seeded
+interred
+renewable
+rhys
+leapt
+revision
+molecule
+##ages
+chord
+vicious
+nord
+shivered
+23rd
+arlington
+debts
+corpus
+sunrise
+bays
+blackburn
+centimetres
+##uded
+shuddered
+gm
+strangely
+gripping
+cartoons
+isabelle
+orbital
+##ppa
+seals
+proving
+##lton
+refusal
+strengthened
+bust
+assisting
+baghdad
+batsman
+portrayal
+mara
+pushes
+spears
+og
+##cock
+reside
+nathaniel
+brennan
+1776
+confirmation
+caucus
+##worthy
+markings
+yemen
+nobles
+ku
+lazy
+viewer
+catalan
+encompasses
+sawyer
+##fall
+sparked
+substances
+patents
+braves
+arranger
+evacuation
+sergio
+persuade
+dover
+tolerance
+penguin
+cum
+jockey
+insufficient
+townships
+occupying
+declining
+plural
+processed
+projection
+puppet
+flanders
+introduces
+liability
+##yon
+gymnastics
+antwerp
+taipei
+hobart
+candles
+jeep
+wes
+observers
+126
+chaplain
+bundle
+glorious
+##hine
+hazel
+flung
+sol
+excavations
+dumped
+stares
+sh
+bangalore
+triangular
+icelandic
+intervals
+expressing
+turbine
+##vers
+songwriting
+crafts
+##igo
+jasmine
+ditch
+rite
+##ways
+entertaining
+comply
+sorrow
+wrestlers
+basel
+emirates
+marian
+rivera
+helpful
+##some
+caution
+downward
+networking
+##atory
+##tered
+darted
+genocide
+emergence
+replies
+specializing
+spokesman
+convenient
+unlocked
+fading
+augustine
+concentrations
+resemblance
+elijah
+investigator
+andhra
+##uda
+promotes
+bean
+##rrell
+fleeing
+wan
+simone
+announcer
+##ame
+##bby
+lydia
+weaver
+132
+residency
+modification
+##fest
+stretches
+##ast
+alternatively
+nat
+lowe
+lacks
+##ented
+pam
+tile
+concealed
+inferior
+abdullah
+residences
+tissues
+vengeance
+##ided
+moisture
+peculiar
+groove
+zip
+bologna
+jennings
+ninja
+oversaw
+zombies
+pumping
+batch
+livingston
+emerald
+installations
+1797
+peel
+nitrogen
+rama
+##fying
+##star
+schooling
+strands
+responding
+werner
+##ost
+lime
+casa
+accurately
+targeting
+##rod
+underway
+##uru
+hemisphere
+lester
+##yard
+occupies
+2d
+griffith
+angrily
+reorganized
+##owing
+courtney
+deposited
+##dd
+##30
+estadio
+##ifies
+dunn
+exiled
+##ying
+checks
+##combe
+##о
+##fly
+successes
+unexpectedly
+blu
+assessed
+##flower
+##ه
+observing
+sacked
+spiders
+kn
+##tail
+mu
+nodes
+prosperity
+audrey
+divisional
+155
+broncos
+tangled
+adjust
+feeds
+erosion
+paolo
+surf
+directory
+snatched
+humid
+admiralty
+screwed
+gt
+reddish
+##nese
+modules
+trench
+lamps
+bind
+leah
+bucks
+competes
+##nz
+##form
+transcription
+##uc
+isles
+violently
+clutching
+pga
+cyclist
+inflation
+flats
+ragged
+unnecessary
+##hian
+stubborn
+coordinated
+harriet
+baba
+disqualified
+330
+insect
+wolfe
+##fies
+reinforcements
+rocked
+duel
+winked
+embraced
+bricks
+##raj
+hiatus
+defeats
+pending
+brightly
+jealousy
+##xton
+##hm
+##uki
+lena
+gdp
+colorful
+##dley
+stein
+kidney
+##shu
+underwear
+wanderers
+##haw
+##icus
+guardians
+m³
+roared
+habits
+##wise
+permits
+gp
+uranium
+punished
+disguise
+bundesliga
+elise
+dundee
+erotic
+partisan
+pi
+collectors
+float
+individually
+rendering
+behavioral
+bucharest
+ser
+hare
+valerie
+corporal
+nutrition
+proportional
+##isa
+immense
+##kis
+pavement
+##zie
+##eld
+sutherland
+crouched
+1775
+##lp
+suzuki
+trades
+endurance
+operas
+crosby
+prayed
+priory
+rory
+socially
+##urn
+gujarat
+##pu
+walton
+cube
+pasha
+privilege
+lennon
+floods
+thorne
+waterfall
+nipple
+scouting
+approve
+##lov
+minorities
+voter
+dwight
+extensions
+assure
+ballroom
+slap
+dripping
+privileges
+rejoined
+confessed
+demonstrating
+patriotic
+yell
+investor
+##uth
+pagan
+slumped
+squares
+##cle
+##kins
+confront
+bert
+embarrassment
+##aid
+aston
+urging
+sweater
+starr
+yuri
+brains
+williamson
+commuter
+mortar
+structured
+selfish
+exports
+##jon
+cds
+##him
+unfinished
+##rre
+mortgage
+destinations
+##nagar
+canoe
+solitary
+buchanan
+delays
+magistrate
+fk
+##pling
+motivation
+##lier
+##vier
+recruiting
+assess
+##mouth
+malik
+antique
+1791
+pius
+rahman
+reich
+tub
+zhou
+smashed
+airs
+galway
+xii
+conditioning
+honduras
+discharged
+dexter
+##pf
+lionel
+129
+debates
+lemon
+tiffany
+volunteered
+dom
+dioxide
+procession
+devi
+sic
+tremendous
+advertisements
+colts
+transferring
+verdict
+hanover
+decommissioned
+utter
+relate
+pac
+racism
+##top
+beacon
+limp
+similarity
+terra
+occurrence
+ant
+##how
+becky
+capt
+updates
+armament
+richie
+pal
+##graph
+halloween
+mayo
+##ssen
+##bone
+cara
+serena
+fcc
+dolls
+obligations
+##dling
+violated
+lafayette
+jakarta
+exploitation
+##ime
+infamous
+iconic
+##lah
+##park
+kitty
+moody
+reginald
+dread
+spill
+crystals
+olivier
+modeled
+bluff
+equilibrium
+separating
+notices
+ordnance
+extinction
+onset
+cosmic
+attachment
+sammy
+expose
+privy
+anchored
+##bil
+abbott
+admits
+bending
+baritone
+emmanuel
+policeman
+vaughan
+winged
+climax
+dresses
+denny
+polytechnic
+mohamed
+burmese
+authentic
+nikki
+genetics
+grandparents
+homestead
+gaza
+postponed
+metacritic
+una
+##sby
+##bat
+unstable
+dissertation
+##rial
+##cian
+curls
+obscure
+uncovered
+bronx
+praying
+disappearing
+##hoe
+prehistoric
+coke
+turret
+mutations
+nonprofit
+pits
+monaco
+##ي
+##usion
+prominently
+dispatched
+podium
+##mir
+uci
+##uation
+133
+fortifications
+birthplace
+kendall
+##lby
+##oll
+preacher
+rack
+goodman
+##rman
+persistent
+##ott
+countless
+jaime
+recorder
+lexington
+persecution
+jumps
+renewal
+wagons
+##11
+crushing
+##holder
+decorations
+##lake
+abundance
+wrath
+laundry
+£1
+garde
+##rp
+jeanne
+beetles
+peasant
+##sl
+splitting
+caste
+sergei
+##rer
+##ema
+scripts
+##ively
+rub
+satellites
+##vor
+inscribed
+verlag
+scrapped
+gale
+packages
+chick
+potato
+slogan
+kathleen
+arabs
+##culture
+counterparts
+reminiscent
+choral
+##tead
+rand
+retains
+bushes
+dane
+accomplish
+courtesy
+closes
+##oth
+slaughter
+hague
+krakow
+lawson
+tailed
+elias
+ginger
+##ttes
+canopy
+betrayal
+rebuilding
+turf
+##hof
+frowning
+allegiance
+brigades
+kicks
+rebuild
+polls
+alias
+nationalism
+td
+rowan
+audition
+bowie
+fortunately
+recognizes
+harp
+dillon
+horrified
+##oro
+renault
+##tics
+ropes
+##α
+presumed
+rewarded
+infrared
+wiping
+accelerated
+illustration
+##rid
+presses
+practitioners
+badminton
+##iard
+detained
+##tera
+recognizing
+relates
+misery
+##sies
+##tly
+reproduction
+piercing
+potatoes
+thornton
+esther
+manners
+hbo
+##aan
+ours
+bullshit
+ernie
+perennial
+sensitivity
+illuminated
+rupert
+##jin
+##iss
+##ear
+rfc
+nassau
+##dock
+staggered
+socialism
+##haven
+appointments
+nonsense
+prestige
+sharma
+haul
+##tical
+solidarity
+gps
+##ook
+##rata
+igor
+pedestrian
+##uit
+baxter
+tenants
+wires
+medication
+unlimited
+guiding
+impacts
+diabetes
+##rama
+sasha
+pas
+clive
+extraction
+131
+continually
+constraints
+##bilities
+sonata
+hunted
+sixteenth
+chu
+planting
+quote
+mayer
+pretended
+abs
+spat
+##hua
+ceramic
+##cci
+curtains
+pigs
+pitching
+##dad
+latvian
+sore
+dayton
+##sted
+##qi
+patrols
+slice
+playground
+##nted
+shone
+stool
+apparatus
+inadequate
+mates
+treason
+##ija
+desires
+##liga
+##croft
+somalia
+laurent
+mir
+leonardo
+oracle
+grape
+obliged
+chevrolet
+thirteenth
+stunning
+enthusiastic
+##ede
+accounted
+concludes
+currents
+basil
+##kovic
+drought
+##rica
+mai
+##aire
+shove
+posting
+##shed
+pilgrimage
+humorous
+packing
+fry
+pencil
+wines
+smells
+144
+marilyn
+aching
+newest
+clung
+bon
+neighbours
+sanctioned
+##pie
+mug
+##stock
+drowning
+##mma
+hydraulic
+##vil
+hiring
+reminder
+lilly
+investigators
+##ncies
+sour
+##eous
+compulsory
+packet
+##rion
+##graphic
+##elle
+cannes
+##inate
+depressed
+##rit
+heroic
+importantly
+theresa
+##tled
+conway
+saturn
+marginal
+rae
+##xia
+corresponds
+royce
+pact
+jasper
+explosives
+packaging
+aluminium
+##ttered
+denotes
+rhythmic
+spans
+assignments
+hereditary
+outlined
+originating
+sundays
+lad
+reissued
+greeting
+beatrice
+##dic
+pillar
+marcos
+plots
+handbook
+alcoholic
+judiciary
+avant
+slides
+extract
+masculine
+blur
+##eum
+##force
+homage
+trembled
+owens
+hymn
+trey
+omega
+signaling
+socks
+accumulated
+reacted
+attic
+theo
+lining
+angie
+distraction
+primera
+talbot
+##key
+1200
+ti
+creativity
+billed
+##hey
+deacon
+eduardo
+identifies
+proposition
+dizzy
+gunner
+hogan
+##yam
+##pping
+##hol
+ja
+##chan
+jensen
+reconstructed
+##berger
+clearance
+darius
+##nier
+abe
+harlem
+plea
+dei
+circled
+emotionally
+notation
+fascist
+neville
+exceeded
+upwards
+viable
+ducks
+##fo
+workforce
+racer
+limiting
+shri
+##lson
+possesses
+1600
+kerr
+moths
+devastating
+laden
+disturbing
+locking
+##cture
+gal
+fearing
+accreditation
+flavor
+aide
+1870s
+mountainous
+##baum
+melt
+##ures
+motel
+texture
+servers
+soda
+##mb
+herd
+##nium
+erect
+puzzled
+hum
+peggy
+examinations
+gould
+testified
+geoff
+ren
+devised
+sacks
+##law
+denial
+posters
+grunted
+cesar
+tutor
+ec
+gerry
+offerings
+byrne
+falcons
+combinations
+ct
+incoming
+pardon
+rocking
+26th
+avengers
+flared
+mankind
+seller
+uttar
+loch
+nadia
+stroking
+exposing
+##hd
+fertile
+ancestral
+instituted
+##has
+noises
+prophecy
+taxation
+eminent
+vivid
+pol
+##bol
+dart
+indirect
+multimedia
+notebook
+upside
+displaying
+adrenaline
+referenced
+geometric
+##iving
+progression
+##ddy
+blunt
+announce
+##far
+implementing
+##lav
+aggression
+liaison
+cooler
+cares
+headache
+plantations
+gorge
+dots
+impulse
+thickness
+ashamed
+averaging
+kathy
+obligation
+precursor
+137
+fowler
+symmetry
+thee
+225
+hears
+##rai
+undergoing
+ads
+butcher
+bowler
+##lip
+cigarettes
+subscription
+goodness
+##ically
+browne
+##hos
+##tech
+kyoto
+donor
+##erty
+damaging
+friction
+drifting
+expeditions
+hardened
+prostitution
+152
+fauna
+blankets
+claw
+tossing
+snarled
+butterflies
+recruits
+investigative
+coated
+healed
+138
+communal
+hai
+xiii
+academics
+boone
+psychologist
+restless
+lahore
+stephens
+mba
+brendan
+foreigners
+printer
+##pc
+ached
+explode
+27th
+deed
+scratched
+dared
+##pole
+cardiac
+1780
+okinawa
+proto
+commando
+compelled
+oddly
+electrons
+##base
+replica
+thanksgiving
+##rist
+sheila
+deliberate
+stafford
+tidal
+representations
+hercules
+ou
+##path
+##iated
+kidnapping
+lenses
+##tling
+deficit
+samoa
+mouths
+consuming
+computational
+maze
+granting
+smirk
+razor
+fixture
+ideals
+inviting
+aiden
+nominal
+##vs
+issuing
+julio
+pitt
+ramsey
+docks
+##oss
+exhaust
+##owed
+bavarian
+draped
+anterior
+mating
+ethiopian
+explores
+noticing
+##nton
+discarded
+convenience
+hoffman
+endowment
+beasts
+cartridge
+mormon
+paternal
+probe
+sleeves
+interfere
+lump
+deadline
+##rail
+jenks
+bulldogs
+scrap
+alternating
+justified
+reproductive
+nam
+seize
+descending
+secretariat
+kirby
+coupe
+grouped
+smash
+panther
+sedan
+tapping
+##18
+lola
+cheer
+germanic
+unfortunate
+##eter
+unrelated
+##fan
+subordinate
+##sdale
+suzanne
+advertisement
+##ility
+horsepower
+##lda
+cautiously
+discourse
+luigi
+##mans
+##fields
+noun
+prevalent
+mao
+schneider
+everett
+surround
+governorate
+kira
+##avia
+westward
+##take
+misty
+rails
+sustainability
+134
+unused
+##rating
+packs
+toast
+unwilling
+regulate
+thy
+suffrage
+nile
+awe
+assam
+definitions
+travelers
+affordable
+##rb
+conferred
+sells
+undefeated
+beneficial
+torso
+basal
+repeating
+remixes
+##pass
+bahrain
+cables
+fang
+##itated
+excavated
+numbering
+statutory
+##rey
+deluxe
+##lian
+forested
+ramirez
+derbyshire
+zeus
+slamming
+transfers
+astronomer
+banana
+lottery
+berg
+histories
+bamboo
+##uchi
+resurrection
+posterior
+bowls
+vaguely
+##thi
+thou
+preserving
+tensed
+offence
+##inas
+meyrick
+callum
+ridden
+watt
+langdon
+tying
+lowland
+snorted
+daring
+truman
+##hale
+##girl
+aura
+overly
+filing
+weighing
+goa
+infections
+philanthropist
+saunders
+eponymous
+##owski
+latitude
+perspectives
+reviewing
+mets
+commandant
+radial
+##kha
+flashlight
+reliability
+koch
+vowels
+amazed
+ada
+elaine
+supper
+##rth
+##encies
+predator
+debated
+soviets
+cola
+##boards
+##nah
+compartment
+crooked
+arbitrary
+fourteenth
+##ctive
+havana
+majors
+steelers
+clips
+profitable
+ambush
+exited
+packers
+##tile
+nude
+cracks
+fungi
+##е
+limb
+trousers
+josie
+shelby
+tens
+frederic
+##ος
+definite
+smoothly
+constellation
+insult
+baton
+discs
+lingering
+##nco
+conclusions
+lent
+staging
+becker
+grandpa
+shaky
+##tron
+einstein
+obstacles
+sk
+adverse
+elle
+economically
+##moto
+mccartney
+thor
+dismissal
+motions
+readings
+nostrils
+treatise
+##pace
+squeezing
+evidently
+prolonged
+1783
+venezuelan
+je
+marguerite
+beirut
+takeover
+shareholders
+##vent
+denise
+digit
+airplay
+norse
+##bbling
+imaginary
+pills
+hubert
+blaze
+vacated
+eliminating
+##ello
+vine
+mansfield
+##tty
+retrospective
+barrow
+borne
+clutch
+bail
+forensic
+weaving
+##nett
+##witz
+desktop
+citadel
+promotions
+worrying
+dorset
+ieee
+subdivided
+##iating
+manned
+expeditionary
+pickup
+synod
+chuckle
+185
+barney
+##rz
+##ffin
+functionality
+karachi
+litigation
+meanings
+uc
+lick
+turbo
+anders
+##ffed
+execute
+curl
+oppose
+ankles
+typhoon
+##د
+##ache
+##asia
+linguistics
+compassion
+pressures
+grazing
+perfection
+##iting
+immunity
+monopoly
+muddy
+backgrounds
+136
+namibia
+francesca
+monitors
+attracting
+stunt
+tuition
+##ии
+vegetable
+##mates
+##quent
+mgm
+jen
+complexes
+forts
+##ond
+cellar
+bites
+seventeenth
+royals
+flemish
+failures
+mast
+charities
+##cular
+peruvian
+capitals
+macmillan
+ipswich
+outward
+frigate
+postgraduate
+folds
+employing
+##ouse
+concurrently
+fiery
+##tai
+contingent
+nightmares
+monumental
+nicaragua
+##kowski
+lizard
+mal
+fielding
+gig
+reject
+##pad
+harding
+##ipe
+coastline
+##cin
+##nos
+beethoven
+humphrey
+innovations
+##tam
+##nge
+norris
+doris
+solicitor
+huang
+obey
+141
+##lc
+niagara
+##tton
+shelves
+aug
+bourbon
+curry
+nightclub
+specifications
+hilton
+##ndo
+centennial
+dispersed
+worm
+neglected
+briggs
+sm
+font
+kuala
+uneasy
+plc
+##nstein
+##bound
+##aking
+##burgh
+awaiting
+pronunciation
+##bbed
+##quest
+eh
+optimal
+zhu
+raped
+greens
+presided
+brenda
+worries
+##life
+venetian
+marxist
+turnout
+##lius
+refined
+braced
+sins
+grasped
+sunderland
+nickel
+speculated
+lowell
+cyrillic
+communism
+fundraising
+resembling
+colonists
+mutant
+freddie
+usc
+##mos
+gratitude
+##run
+mural
+##lous
+chemist
+wi
+reminds
+28th
+steals
+tess
+pietro
+##ingen
+promoter
+ri
+microphone
+honoured
+rai
+sant
+##qui
+feather
+##nson
+burlington
+kurdish
+terrorists
+deborah
+sickness
+##wed
+##eet
+hazard
+irritated
+desperation
+veil
+clarity
+##rik
+jewels
+xv
+##gged
+##ows
+##cup
+berkshire
+unfair
+mysteries
+orchid
+winced
+exhaustion
+renovations
+stranded
+obe
+infinity
+##nies
+adapt
+redevelopment
+thanked
+registry
+olga
+domingo
+noir
+tudor
+ole
+##atus
+commenting
+behaviors
+##ais
+crisp
+pauline
+probable
+stirling
+wigan
+##bian
+paralympics
+panting
+surpassed
+##rew
+luca
+barred
+pony
+famed
+##sters
+cassandra
+waiter
+carolyn
+exported
+##orted
+andres
+destructive
+deeds
+jonah
+castles
+vacancy
+suv
+##glass
+1788
+orchard
+yep
+famine
+belarusian
+sprang
+##forth
+skinny
+##mis
+administrators
+rotterdam
+zambia
+zhao
+boiler
+discoveries
+##ride
+##physics
+lucius
+disappointing
+outreach
+spoon
+##frame
+qualifications
+unanimously
+enjoys
+regency
+##iidae
+stade
+realism
+veterinary
+rodgers
+dump
+alain
+chestnut
+castile
+censorship
+rumble
+gibbs
+##itor
+communion
+reggae
+inactivated
+logs
+loads
+##houses
+homosexual
+##iano
+ale
+informs
+##cas
+phrases
+plaster
+linebacker
+ambrose
+kaiser
+fascinated
+850
+limerick
+recruitment
+forge
+mastered
+##nding
+leinster
+rooted
+threaten
+##strom
+borneo
+##hes
+suggestions
+scholarships
+propeller
+documentaries
+patronage
+coats
+constructing
+invest
+neurons
+comet
+entirety
+shouts
+identities
+annoying
+unchanged
+wary
+##antly
+##ogy
+neat
+oversight
+##kos
+phillies
+replay
+constance
+##kka
+incarnation
+humble
+skies
+minus
+##acy
+smithsonian
+##chel
+guerrilla
+jar
+cadets
+##plate
+surplus
+audit
+##aru
+cracking
+joanna
+louisa
+pacing
+##lights
+intentionally
+##iri
+diner
+nwa
+imprint
+australians
+tong
+unprecedented
+bunker
+naive
+specialists
+ark
+nichols
+railing
+leaked
+pedal
+##uka
+shrub
+longing
+roofs
+v8
+captains
+neural
+tuned
+##ntal
+##jet
+emission
+medina
+frantic
+codex
+definitive
+sid
+abolition
+intensified
+stocks
+enrique
+sustain
+genoa
+oxide
+##written
+clues
+cha
+##gers
+tributaries
+fragment
+venom
+##rity
+##ente
+##sca
+muffled
+vain
+sire
+laos
+##ingly
+##hana
+hastily
+snapping
+surfaced
+sentiment
+motive
+##oft
+contests
+approximate
+mesa
+luckily
+dinosaur
+exchanges
+propelled
+accord
+bourne
+relieve
+tow
+masks
+offended
+##ues
+cynthia
+##mmer
+rains
+bartender
+zinc
+reviewers
+lois
+##sai
+legged
+arrogant
+rafe
+rosie
+comprise
+handicap
+blockade
+inlet
+lagoon
+copied
+drilling
+shelley
+petals
+##inian
+mandarin
+obsolete
+##inated
+onward
+arguably
+productivity
+cindy
+praising
+seldom
+busch
+discusses
+raleigh
+shortage
+ranged
+stanton
+encouragement
+firstly
+conceded
+overs
+temporal
+##uke
+cbe
+##bos
+woo
+certainty
+pumps
+##pton
+stalked
+##uli
+lizzie
+periodic
+thieves
+weaker
+##night
+gases
+shoving
+chooses
+wc
+##chemical
+prompting
+weights
+##kill
+robust
+flanked
+sticky
+hu
+tuberculosis
+##eb
+##eal
+christchurch
+resembled
+wallet
+reese
+inappropriate
+pictured
+distract
+fixing
+fiddle
+giggled
+burger
+heirs
+hairy
+mechanic
+torque
+apache
+obsessed
+chiefly
+cheng
+logging
+##tag
+extracted
+meaningful
+numb
+##vsky
+gloucestershire
+reminding
+##bay
+unite
+##lit
+breeds
+diminished
+clown
+glove
+1860s
+##ن
+##ug
+archibald
+focal
+freelance
+sliced
+depiction
+##yk
+organism
+switches
+sights
+stray
+crawling
+##ril
+lever
+leningrad
+interpretations
+loops
+anytime
+reel
+alicia
+delighted
+##ech
+inhaled
+xiv
+suitcase
+bernie
+vega
+licenses
+northampton
+exclusion
+induction
+monasteries
+racecourse
+homosexuality
+##right
+##sfield
+##rky
+dimitri
+michele
+alternatives
+ions
+commentators
+genuinely
+objected
+pork
+hospitality
+fencing
+stephan
+warships
+peripheral
+wit
+drunken
+wrinkled
+quentin
+spends
+departing
+chung
+numerical
+spokesperson
+##zone
+johannesburg
+caliber
+killers
+##udge
+assumes
+neatly
+demographic
+abigail
+bloc
+##vel
+mounting
+##lain
+bentley
+slightest
+xu
+recipients
+##jk
+merlin
+##writer
+seniors
+prisons
+blinking
+hindwings
+flickered
+kappa
+##hel
+80s
+strengthening
+appealing
+brewing
+gypsy
+mali
+lashes
+hulk
+unpleasant
+harassment
+bio
+treaties
+predict
+instrumentation
+pulp
+troupe
+boiling
+mantle
+##ffe
+ins
+##vn
+dividing
+handles
+verbs
+##onal
+coconut
+senegal
+340
+thorough
+gum
+momentarily
+##sto
+cocaine
+panicked
+destined
+##turing
+teatro
+denying
+weary
+captained
+mans
+##hawks
+##code
+wakefield
+bollywood
+thankfully
+##16
+cyril
+##wu
+amendments
+##bahn
+consultation
+stud
+reflections
+kindness
+1787
+internally
+##ovo
+tex
+mosaic
+distribute
+paddy
+seeming
+143
+##hic
+piers
+##15
+##mura
+##verse
+popularly
+winger
+kang
+sentinel
+mccoy
+##anza
+covenant
+##bag
+verge
+fireworks
+suppress
+thrilled
+dominate
+##jar
+swansea
+##60
+142
+reconciliation
+##ndi
+stiffened
+cue
+dorian
+##uf
+damascus
+amor
+ida
+foremost
+##aga
+porsche
+unseen
+dir
+##had
+##azi
+stony
+lexi
+melodies
+##nko
+angular
+integer
+podcast
+ants
+inherent
+jaws
+justify
+persona
+##olved
+josephine
+##nr
+##ressed
+customary
+flashes
+gala
+cyrus
+glaring
+backyard
+ariel
+physiology
+greenland
+html
+stir
+avon
+atletico
+finch
+methodology
+ked
+##lent
+mas
+catholicism
+townsend
+branding
+quincy
+fits
+containers
+1777
+ashore
+aragon
+##19
+forearm
+poisoning
+##sd
+adopting
+conquer
+grinding
+amnesty
+keller
+finances
+evaluate
+forged
+lankan
+instincts
+##uto
+guam
+bosnian
+photographed
+workplace
+desirable
+protector
+##dog
+allocation
+intently
+encourages
+willy
+##sten
+bodyguard
+electro
+brighter
+##ν
+bihar
+##chev
+lasts
+opener
+amphibious
+sal
+verde
+arte
+##cope
+captivity
+vocabulary
+yields
+##tted
+agreeing
+desmond
+pioneered
+##chus
+strap
+campaigned
+railroads
+##ович
+emblem
+##dre
+stormed
+501
+##ulous
+marijuana
+northumberland
+##gn
+##nath
+bowen
+landmarks
+beaumont
+##qua
+danube
+##bler
+attorneys
+th
+ge
+flyers
+critique
+villains
+cass
+mutation
+acc
+##0s
+colombo
+mckay
+motif
+sampling
+concluding
+syndicate
+##rell
+neon
+stables
+ds
+warnings
+clint
+mourning
+wilkinson
+##tated
+merrill
+leopard
+evenings
+exhaled
+emil
+sonia
+ezra
+discrete
+stove
+farrell
+fifteenth
+prescribed
+superhero
+##rier
+worms
+helm
+wren
+##duction
+##hc
+expo
+##rator
+hq
+unfamiliar
+antony
+prevents
+acceleration
+fiercely
+mari
+painfully
+calculations
+cheaper
+ign
+clifton
+irvine
+davenport
+mozambique
+##np
+pierced
+##evich
+wonders
+##wig
+##cate
+##iling
+crusade
+ware
+##uel
+enzymes
+reasonably
+mls
+##coe
+mater
+ambition
+bunny
+eliot
+kernel
+##fin
+asphalt
+headmaster
+torah
+aden
+lush
+pins
+waived
+##care
+##yas
+joao
+substrate
+enforce
+##grad
+##ules
+alvarez
+selections
+epidemic
+tempted
+##bit
+bremen
+translates
+ensured
+waterfront
+29th
+forrest
+manny
+malone
+kramer
+reigning
+cookies
+simpler
+absorption
+205
+engraved
+##ffy
+evaluated
+1778
+haze
+146
+comforting
+crossover
+##abe
+thorn
+##rift
+##imo
+##pop
+suppression
+fatigue
+cutter
+##tr
+201
+wurttemberg
+##orf
+enforced
+hovering
+proprietary
+gb
+samurai
+syllable
+ascent
+lacey
+tick
+lars
+tractor
+merchandise
+rep
+bouncing
+defendants
+##yre
+huntington
+##ground
+##oko
+standardized
+##hor
+##hima
+assassinated
+nu
+predecessors
+rainy
+liar
+assurance
+lyrical
+##uga
+secondly
+flattened
+ios
+parameter
+undercover
+##mity
+bordeaux
+punish
+ridges
+markers
+exodus
+inactive
+hesitate
+debbie
+nyc
+pledge
+savoy
+nagar
+offset
+organist
+##tium
+hesse
+marin
+converting
+##iver
+diagram
+propulsion
+pu
+validity
+reverted
+supportive
+##dc
+ministries
+clans
+responds
+proclamation
+##inae
+##ø
+##rea
+ein
+pleading
+patriot
+sf
+birch
+islanders
+strauss
+hates
+##dh
+brandenburg
+concession
+rd
+##ob
+1900s
+killings
+textbook
+antiquity
+cinematography
+wharf
+embarrassing
+setup
+creed
+farmland
+inequality
+centred
+signatures
+fallon
+370
+##ingham
+##uts
+ceylon
+gazing
+directive
+laurie
+##tern
+globally
+##uated
+##dent
+allah
+excavation
+threads
+##cross
+148
+frantically
+icc
+utilize
+determines
+respiratory
+thoughtful
+receptions
+##dicate
+merging
+chandra
+seine
+147
+builders
+builds
+diagnostic
+dev
+visibility
+goddamn
+analyses
+dhaka
+cho
+proves
+chancel
+concurrent
+curiously
+canadians
+pumped
+restoring
+1850s
+turtles
+jaguar
+sinister
+spinal
+traction
+declan
+vows
+1784
+glowed
+capitalism
+swirling
+install
+universidad
+##lder
+##oat
+soloist
+##genic
+##oor
+coincidence
+beginnings
+nissan
+dip
+resorts
+caucasus
+combustion
+infectious
+##eno
+pigeon
+serpent
+##itating
+conclude
+masked
+salad
+jew
+##gr
+surreal
+toni
+##wc
+harmonica
+151
+##gins
+##etic
+##coat
+fishermen
+intending
+bravery
+##wave
+klaus
+titan
+wembley
+taiwanese
+ransom
+40th
+incorrect
+hussein
+eyelids
+jp
+cooke
+dramas
+utilities
+##etta
+##print
+eisenhower
+principally
+granada
+lana
+##rak
+openings
+concord
+##bl
+bethany
+connie
+morality
+sega
+##mons
+##nard
+earnings
+##kara
+##cine
+wii
+communes
+##rel
+coma
+composing
+softened
+severed
+grapes
+##17
+nguyen
+analyzed
+warlord
+hubbard
+heavenly
+behave
+slovenian
+##hit
+##ony
+hailed
+filmmakers
+trance
+caldwell
+skye
+unrest
+coward
+likelihood
+##aging
+bern
+sci
+taliban
+honolulu
+propose
+##wang
+1700
+browser
+imagining
+cobra
+contributes
+dukes
+instinctively
+conan
+violinist
+##ores
+accessories
+gradual
+##amp
+quotes
+sioux
+##dating
+undertake
+intercepted
+sparkling
+compressed
+139
+fungus
+tombs
+haley
+imposing
+rests
+degradation
+lincolnshire
+retailers
+wetlands
+tulsa
+distributor
+dungeon
+nun
+greenhouse
+convey
+atlantis
+aft
+exits
+oman
+dresser
+lyons
+##sti
+joking
+eddy
+judgement
+omitted
+digits
+##cts
+##game
+juniors
+##rae
+cents
+stricken
+une
+##ngo
+wizards
+weir
+breton
+nan
+technician
+fibers
+liking
+royalty
+##cca
+154
+persia
+terribly
+magician
+##rable
+##unt
+vance
+cafeteria
+booker
+camille
+warmer
+##static
+consume
+cavern
+gaps
+compass
+contemporaries
+foyer
+soothing
+graveyard
+maj
+plunged
+blush
+##wear
+cascade
+demonstrates
+ordinance
+##nov
+boyle
+##lana
+rockefeller
+shaken
+banjo
+izzy
+##ense
+breathless
+vines
+##32
+##eman
+alterations
+chromosome
+dwellings
+feudal
+mole
+153
+catalonia
+relics
+tenant
+mandated
+##fm
+fridge
+hats
+honesty
+patented
+raul
+heap
+cruisers
+accusing
+enlightenment
+infants
+wherein
+chatham
+contractors
+zen
+affinity
+hc
+osborne
+piston
+156
+traps
+maturity
+##rana
+lagos
+##zal
+peering
+##nay
+attendant
+dealers
+protocols
+subset
+prospects
+biographical
+##cre
+artery
+##zers
+insignia
+nuns
+endured
+##eration
+recommend
+schwartz
+serbs
+berger
+cromwell
+crossroads
+##ctor
+enduring
+clasped
+grounded
+##bine
+marseille
+twitched
+abel
+choke
+https
+catalyst
+moldova
+italians
+##tist
+disastrous
+wee
+##oured
+##nti
+wwf
+nope
+##piration
+##asa
+expresses
+thumbs
+167
+##nza
+coca
+1781
+cheating
+##ption
+skipped
+sensory
+heidelberg
+spies
+satan
+dangers
+semifinal
+202
+bohemia
+whitish
+confusing
+shipbuilding
+relies
+surgeons
+landings
+ravi
+baku
+moor
+suffix
+alejandro
+##yana
+litre
+upheld
+##unk
+rajasthan
+##rek
+coaster
+insists
+posture
+scenarios
+etienne
+favoured
+appoint
+transgender
+elephants
+poked
+greenwood
+defences
+fulfilled
+militant
+somali
+1758
+chalk
+potent
+##ucci
+migrants
+wink
+assistants
+nos
+restriction
+activism
+niger
+##ario
+colon
+shaun
+##sat
+daphne
+##erated
+swam
+congregations
+reprise
+considerations
+magnet
+playable
+xvi
+##р
+overthrow
+tobias
+knob
+chavez
+coding
+##mers
+propped
+katrina
+orient
+newcomer
+##suke
+temperate
+##pool
+farmhouse
+interrogation
+##vd
+committing
+##vert
+forthcoming
+strawberry
+joaquin
+macau
+ponds
+shocking
+siberia
+##cellular
+chant
+contributors
+##nant
+##ologists
+sped
+absorb
+hail
+1782
+spared
+##hore
+barbados
+karate
+opus
+originates
+saul
+##xie
+evergreen
+leaped
+##rock
+correlation
+exaggerated
+weekday
+unification
+bump
+tracing
+brig
+afb
+pathways
+utilizing
+##ners
+mod
+mb
+disturbance
+kneeling
+##stad
+##guchi
+100th
+pune
+##thy
+decreasing
+168
+manipulation
+miriam
+academia
+ecosystem
+occupational
+rbi
+##lem
+rift
+##14
+rotary
+stacked
+incorporation
+awakening
+generators
+guerrero
+racist
+##omy
+cyber
+derivatives
+culminated
+allie
+annals
+panzer
+sainte
+wikipedia
+pops
+zu
+austro
+##vate
+algerian
+politely
+nicholson
+mornings
+educate
+tastes
+thrill
+dartmouth
+##gating
+db
+##jee
+regan
+differing
+concentrating
+choreography
+divinity
+##media
+pledged
+alexandre
+routing
+gregor
+madeline
+##idal
+apocalypse
+##hora
+gunfire
+culminating
+elves
+fined
+liang
+lam
+programmed
+tar
+guessing
+transparency
+gabrielle
+##gna
+cancellation
+flexibility
+##lining
+accession
+shea
+stronghold
+nets
+specializes
+##rgan
+abused
+hasan
+sgt
+ling
+exceeding
+##₄
+admiration
+supermarket
+##ark
+photographers
+specialised
+tilt
+resonance
+hmm
+perfume
+380
+sami
+threatens
+garland
+botany
+guarding
+boiled
+greet
+puppy
+russo
+supplier
+wilmington
+vibrant
+vijay
+##bius
+paralympic
+grumbled
+paige
+faa
+licking
+margins
+hurricanes
+##gong
+fest
+grenade
+ripping
+##uz
+counseling
+weigh
+##sian
+needles
+wiltshire
+edison
+costly
+##not
+fulton
+tramway
+redesigned
+staffordshire
+cache
+gasping
+watkins
+sleepy
+candidacy
+##group
+monkeys
+timeline
+throbbing
+##bid
+##sos
+berth
+uzbekistan
+vanderbilt
+bothering
+overturned
+ballots
+gem
+##iger
+sunglasses
+subscribers
+hooker
+compelling
+ang
+exceptionally
+saloon
+stab
+##rdi
+carla
+terrifying
+rom
+##vision
+coil
+##oids
+satisfying
+vendors
+31st
+mackay
+deities
+overlooked
+ambient
+bahamas
+felipe
+olympia
+whirled
+botanist
+advertised
+tugging
+##dden
+disciples
+morales
+unionist
+rites
+foley
+morse
+motives
+creepy
+##₀
+soo
+##sz
+bargain
+highness
+frightening
+turnpike
+tory
+reorganization
+##cer
+depict
+biographer
+##walk
+unopposed
+manifesto
+##gles
+institut
+emile
+accidental
+kapoor
+##dam
+kilkenny
+cortex
+lively
+##13
+romanesque
+jain
+shan
+cannons
+##ood
+##ske
+petrol
+echoing
+amalgamated
+disappears
+cautious
+proposes
+sanctions
+trenton
+##ر
+flotilla
+aus
+contempt
+tor
+canary
+cote
+theirs
+##hun
+conceptual
+deleted
+fascinating
+paso
+blazing
+elf
+honourable
+hutchinson
+##eiro
+##outh
+##zin
+surveyor
+tee
+amidst
+wooded
+reissue
+intro
+##ono
+cobb
+shelters
+newsletter
+hanson
+brace
+encoding
+confiscated
+dem
+caravan
+marino
+scroll
+melodic
+cows
+imam
+##adi
+##aneous
+northward
+searches
+biodiversity
+cora
+310
+roaring
+##bers
+connell
+theologian
+halo
+compose
+pathetic
+unmarried
+dynamo
+##oot
+az
+calculation
+toulouse
+deserves
+humour
+nr
+forgiveness
+tam
+undergone
+martyr
+pamela
+myths
+whore
+counselor
+hicks
+290
+heavens
+battleship
+electromagnetic
+##bbs
+stellar
+establishments
+presley
+hopped
+##chin
+temptation
+90s
+wills
+nas
+##yuan
+nhs
+##nya
+seminars
+##yev
+adaptations
+gong
+asher
+lex
+indicator
+sikh
+tobago
+cites
+goin
+##yte
+satirical
+##gies
+characterised
+correspond
+bubbles
+lure
+participates
+##vid
+eruption
+skate
+therapeutic
+1785
+canals
+wholesale
+defaulted
+sac
+460
+petit
+##zzled
+virgil
+leak
+ravens
+256
+portraying
+##yx
+ghetto
+creators
+dams
+portray
+vicente
+##rington
+fae
+namesake
+bounty
+##arium
+joachim
+##ota
+##iser
+aforementioned
+axle
+snout
+depended
+dismantled
+reuben
+480
+##ibly
+gallagher
+##lau
+##pd
+earnest
+##ieu
+##iary
+inflicted
+objections
+##llar
+asa
+gritted
+##athy
+jericho
+##sea
+##was
+flick
+underside
+ceramics
+undead
+substituted
+195
+eastward
+undoubtedly
+wheeled
+chimney
+##iche
+guinness
+cb
+##ager
+siding
+##bell
+traitor
+baptiste
+disguised
+inauguration
+149
+tipperary
+choreographer
+perched
+warmed
+stationary
+eco
+##ike
+##ntes
+bacterial
+##aurus
+flores
+phosphate
+##core
+attacker
+invaders
+alvin
+intersects
+a1
+indirectly
+immigrated
+businessmen
+cornelius
+valves
+narrated
+pill
+sober
+ul
+nationale
+monastic
+applicants
+scenery
+##jack
+161
+motifs
+constitutes
+cpu
+##osh
+jurisdictions
+sd
+tuning
+irritation
+woven
+##uddin
+fertility
+gao
+##erie
+antagonist
+impatient
+glacial
+hides
+boarded
+denominations
+interception
+##jas
+cookie
+nicola
+##tee
+algebraic
+marquess
+bahn
+parole
+buyers
+bait
+turbines
+paperwork
+bestowed
+natasha
+renee
+oceans
+purchases
+157
+vaccine
+215
+##tock
+fixtures
+playhouse
+integrate
+jai
+oswald
+intellectuals
+##cky
+booked
+nests
+mortimer
+##isi
+obsession
+sept
+##gler
+##sum
+440
+scrutiny
+simultaneous
+squinted
+##shin
+collects
+oven
+shankar
+penned
+remarkably
+##я
+slips
+luggage
+spectral
+1786
+collaborations
+louie
+consolidation
+##ailed
+##ivating
+420
+hoover
+blackpool
+harness
+ignition
+vest
+tails
+belmont
+mongol
+skinner
+##nae
+visually
+mage
+derry
+##tism
+##unce
+stevie
+transitional
+##rdy
+redskins
+drying
+prep
+prospective
+##21
+annoyance
+oversee
+##loaded
+fills
+##books
+##iki
+announces
+fda
+scowled
+respects
+prasad
+mystic
+tucson
+##vale
+revue
+springer
+bankrupt
+1772
+aristotle
+salvatore
+habsburg
+##geny
+dal
+natal
+nut
+pod
+chewing
+darts
+moroccan
+walkover
+rosario
+lenin
+punjabi
+##ße
+grossed
+scattering
+wired
+invasive
+hui
+polynomial
+corridors
+wakes
+gina
+portrays
+##cratic
+arid
+retreating
+erich
+irwin
+sniper
+##dha
+linen
+lindsey
+maneuver
+butch
+shutting
+socio
+bounce
+commemorative
+postseason
+jeremiah
+pines
+275
+mystical
+beads
+bp
+abbas
+furnace
+bidding
+consulted
+assaulted
+empirical
+rubble
+enclosure
+sob
+weakly
+cancel
+polly
+yielded
+##emann
+curly
+prediction
+battered
+70s
+vhs
+jacqueline
+render
+sails
+barked
+detailing
+grayson
+riga
+sloane
+raging
+##yah
+herbs
+bravo
+##athlon
+alloy
+giggle
+imminent
+suffers
+assumptions
+waltz
+##itate
+accomplishments
+##ited
+bathing
+remixed
+deception
+prefix
+##emia
+deepest
+##tier
+##eis
+balkan
+frogs
+##rong
+slab
+##pate
+philosophers
+peterborough
+grains
+imports
+dickinson
+rwanda
+##atics
+1774
+dirk
+lan
+tablets
+##rove
+clone
+##rice
+caretaker
+hostilities
+mclean
+##gre
+regimental
+treasures
+norms
+impose
+tsar
+tango
+diplomacy
+variously
+complain
+192
+recognise
+arrests
+1779
+celestial
+pulitzer
+##dus
+bing
+libretto
+##moor
+adele
+splash
+##rite
+expectation
+lds
+confronts
+##izer
+spontaneous
+harmful
+wedge
+entrepreneurs
+buyer
+##ope
+bilingual
+translate
+rugged
+conner
+circulated
+uae
+eaton
+##gra
+##zzle
+lingered
+lockheed
+vishnu
+reelection
+alonso
+##oom
+joints
+yankee
+headline
+cooperate
+heinz
+laureate
+invading
+##sford
+echoes
+scandinavian
+##dham
+hugging
+vitamin
+salute
+micah
+hind
+trader
+##sper
+radioactive
+##ndra
+militants
+poisoned
+ratified
+remark
+campeonato
+deprived
+wander
+prop
+##dong
+outlook
+##tani
+##rix
+##eye
+chiang
+darcy
+##oping
+mandolin
+spice
+statesman
+babylon
+182
+walled
+forgetting
+afro
+##cap
+158
+giorgio
+buffer
+##polis
+planetary
+##gis
+overlap
+terminals
+kinda
+centenary
+##bir
+arising
+manipulate
+elm
+ke
+1770
+ak
+##tad
+chrysler
+mapped
+moose
+pomeranian
+quad
+macarthur
+assemblies
+shoreline
+recalls
+stratford
+##rted
+noticeable
+##evic
+imp
+##rita
+##sque
+accustomed
+supplying
+tents
+disgusted
+vogue
+sipped
+filters
+khz
+reno
+selecting
+luftwaffe
+mcmahon
+tyne
+masterpiece
+carriages
+collided
+dunes
+exercised
+flare
+remembers
+muzzle
+##mobile
+heck
+##rson
+burgess
+lunged
+middleton
+boycott
+bilateral
+##sity
+hazardous
+lumpur
+multiplayer
+spotlight
+jackets
+goldman
+liege
+porcelain
+rag
+waterford
+benz
+attracts
+hopeful
+battling
+ottomans
+kensington
+baked
+hymns
+cheyenne
+lattice
+levine
+borrow
+polymer
+clashes
+michaels
+monitored
+commitments
+denounced
+##25
+##von
+cavity
+##oney
+hobby
+akin
+##holders
+futures
+intricate
+cornish
+patty
+##oned
+illegally
+dolphin
+##lag
+barlow
+yellowish
+maddie
+apologized
+luton
+plagued
+##puram
+nana
+##rds
+sway
+fanny
+łodz
+##rino
+psi
+suspicions
+hanged
+##eding
+initiate
+charlton
+##por
+nak
+competent
+235
+analytical
+annex
+wardrobe
+reservations
+##rma
+sect
+162
+fairfax
+hedge
+piled
+buckingham
+uneven
+bauer
+simplicity
+snyder
+interpret
+accountability
+donors
+moderately
+byrd
+continents
+##cite
+##max
+disciple
+hr
+jamaican
+ping
+nominees
+##uss
+mongolian
+diver
+attackers
+eagerly
+ideological
+pillows
+miracles
+apartheid
+revolver
+sulfur
+clinics
+moran
+163
+##enko
+ile
+katy
+rhetoric
+##icated
+chronology
+recycling
+##hrer
+elongated
+mughal
+pascal
+profiles
+vibration
+databases
+domination
+##fare
+##rant
+matthias
+digest
+rehearsal
+polling
+weiss
+initiation
+reeves
+clinging
+flourished
+impress
+ngo
+##hoff
+##ume
+buckley
+symposium
+rhythms
+weed
+emphasize
+transforming
+##taking
+##gence
+##yman
+accountant
+analyze
+flicker
+foil
+priesthood
+voluntarily
+decreases
+##80
+##hya
+slater
+sv
+charting
+mcgill
+##lde
+moreno
+##iu
+besieged
+zur
+robes
+##phic
+admitting
+api
+deported
+turmoil
+peyton
+earthquakes
+##ares
+nationalists
+beau
+clair
+brethren
+interrupt
+welch
+curated
+galerie
+requesting
+164
+##ested
+impending
+steward
+viper
+##vina
+complaining
+beautifully
+brandy
+foam
+nl
+1660
+##cake
+alessandro
+punches
+laced
+explanations
+##lim
+attribute
+clit
+reggie
+discomfort
+##cards
+smoothed
+whales
+##cene
+adler
+countered
+duffy
+disciplinary
+widening
+recipe
+reliance
+conducts
+goats
+gradient
+preaching
+##shaw
+matilda
+quasi
+striped
+meridian
+cannabis
+cordoba
+certificates
+##agh
+##tering
+graffiti
+hangs
+pilgrims
+repeats
+##ych
+revive
+urine
+etat
+##hawk
+fueled
+belts
+fuzzy
+susceptible
+##hang
+mauritius
+salle
+sincere
+beers
+hooks
+##cki
+arbitration
+entrusted
+advise
+sniffed
+seminar
+junk
+donnell
+processors
+principality
+strapped
+celia
+mendoza
+everton
+fortunes
+prejudice
+starving
+reassigned
+steamer
+##lund
+tuck
+evenly
+foreman
+##ffen
+dans
+375
+envisioned
+slit
+##xy
+baseman
+liberia
+rosemary
+##weed
+electrified
+periodically
+potassium
+stride
+contexts
+sperm
+slade
+mariners
+influx
+bianca
+subcommittee
+##rane
+spilling
+icao
+estuary
+##nock
+delivers
+iphone
+##ulata
+isa
+mira
+bohemian
+dessert
+##sbury
+welcoming
+proudly
+slowing
+##chs
+musee
+ascension
+russ
+##vian
+waits
+##psy
+africans
+exploit
+##morphic
+gov
+eccentric
+crab
+peck
+##ull
+entrances
+formidable
+marketplace
+groom
+bolted
+metabolism
+patton
+robbins
+courier
+payload
+endure
+##ifier
+andes
+refrigerator
+##pr
+ornate
+##uca
+ruthless
+illegitimate
+masonry
+strasbourg
+bikes
+adobe
+##³
+apples
+quintet
+willingly
+niche
+bakery
+corpses
+energetic
+##cliffe
+##sser
+##ards
+177
+centimeters
+centro
+fuscous
+cretaceous
+rancho
+##yde
+andrei
+telecom
+tottenham
+oasis
+ordination
+vulnerability
+presiding
+corey
+cp
+penguins
+sims
+##pis
+malawi
+piss
+##48
+correction
+##cked
+##ffle
+##ryn
+countdown
+detectives
+psychiatrist
+psychedelic
+dinosaurs
+blouse
+##get
+choi
+vowed
+##oz
+randomly
+##pol
+49ers
+scrub
+blanche
+bruins
+dusseldorf
+##using
+unwanted
+##ums
+212
+dominique
+elevations
+headlights
+om
+laguna
+##oga
+1750
+famously
+ignorance
+shrewsbury
+##aine
+ajax
+breuning
+che
+confederacy
+greco
+overhaul
+##screen
+paz
+skirts
+disagreement
+cruelty
+jagged
+phoebe
+shifter
+hovered
+viruses
+##wes
+mandy
+##lined
+##gc
+landlord
+squirrel
+dashed
+##ι
+ornamental
+gag
+wally
+grange
+literal
+spurs
+undisclosed
+proceeding
+yin
+##text
+billie
+orphan
+spanned
+humidity
+indy
+weighted
+presentations
+explosions
+lucian
+##tary
+vaughn
+hindus
+##anga
+##hell
+psycho
+171
+daytona
+protects
+efficiently
+rematch
+sly
+tandem
+##oya
+rebranded
+impaired
+hee
+metropolis
+peach
+godfrey
+diaspora
+ethnicity
+prosperous
+gleaming
+dar
+grossing
+playback
+##rden
+stripe
+pistols
+##tain
+births
+labelled
+##cating
+172
+rudy
+alba
+##onne
+aquarium
+hostility
+##gb
+##tase
+shudder
+sumatra
+hardest
+lakers
+consonant
+creeping
+demos
+homicide
+capsule
+zeke
+liberties
+expulsion
+pueblo
+##comb
+trait
+transporting
+##ddin
+##neck
+##yna
+depart
+gregg
+mold
+ledge
+hangar
+oldham
+playboy
+termination
+analysts
+gmbh
+romero
+##itic
+insist
+cradle
+filthy
+brightness
+slash
+shootout
+deposed
+bordering
+##truct
+isis
+microwave
+tumbled
+sheltered
+cathy
+werewolves
+messy
+andersen
+convex
+clapped
+clinched
+satire
+wasting
+edo
+vc
+rufus
+##jak
+mont
+##etti
+poznan
+##keeping
+restructuring
+transverse
+##rland
+azerbaijani
+slovene
+gestures
+roommate
+choking
+shear
+##quist
+vanguard
+oblivious
+##hiro
+disagreed
+baptism
+##lich
+coliseum
+##aceae
+salvage
+societe
+cory
+locke
+relocation
+relying
+versailles
+ahl
+swelling
+##elo
+cheerful
+##word
+##edes
+gin
+sarajevo
+obstacle
+diverted
+##nac
+messed
+thoroughbred
+fluttered
+utrecht
+chewed
+acquaintance
+assassins
+dispatch
+mirza
+##wart
+nike
+salzburg
+swell
+yen
+##gee
+idle
+ligue
+samson
+##nds
+##igh
+playful
+spawned
+##cise
+tease
+##case
+burgundy
+##bot
+stirring
+skeptical
+interceptions
+marathi
+##dies
+bedrooms
+aroused
+pinch
+##lik
+preferences
+tattoos
+buster
+digitally
+projecting
+rust
+##ital
+kitten
+priorities
+addison
+pseudo
+##guard
+dusk
+icons
+sermon
+##psis
+##iba
+bt
+##lift
+##xt
+ju
+truce
+rink
+##dah
+##wy
+defects
+psychiatry
+offences
+calculate
+glucose
+##iful
+##rized
+##unda
+francaise
+##hari
+richest
+warwickshire
+carly
+1763
+purity
+redemption
+lending
+##cious
+muse
+bruises
+cerebral
+aero
+carving
+##name
+preface
+terminology
+invade
+monty
+##int
+anarchist
+blurred
+##iled
+rossi
+treats
+guts
+shu
+foothills
+ballads
+undertaking
+premise
+cecilia
+affiliates
+blasted
+conditional
+wilder
+minors
+drone
+rudolph
+buffy
+swallowing
+horton
+attested
+##hop
+rutherford
+howell
+primetime
+livery
+penal
+##bis
+minimize
+hydro
+wrecked
+wrought
+palazzo
+##gling
+cans
+vernacular
+friedman
+nobleman
+shale
+walnut
+danielle
+##ection
+##tley
+sears
+##kumar
+chords
+lend
+flipping
+streamed
+por
+dracula
+gallons
+sacrifices
+gamble
+orphanage
+##iman
+mckenzie
+##gible
+boxers
+daly
+##balls
+##ان
+208
+##ific
+##rative
+##iq
+exploited
+slated
+##uity
+circling
+hillary
+pinched
+goldberg
+provost
+campaigning
+lim
+piles
+ironically
+jong
+mohan
+successors
+usaf
+##tem
+##ught
+autobiographical
+haute
+preserves
+##ending
+acquitted
+comparisons
+203
+hydroelectric
+gangs
+cypriot
+torpedoes
+rushes
+chrome
+derive
+bumps
+instability
+fiat
+pets
+##mbe
+silas
+dye
+reckless
+settler
+##itation
+info
+heats
+##writing
+176
+canonical
+maltese
+fins
+mushroom
+stacy
+aspen
+avid
+##kur
+##loading
+vickers
+gaston
+hillside
+statutes
+wilde
+gail
+kung
+sabine
+comfortably
+motorcycles
+##rgo
+169
+pneumonia
+fetch
+##sonic
+axel
+faintly
+parallels
+##oop
+mclaren
+spouse
+compton
+interdisciplinary
+miner
+##eni
+181
+clamped
+##chal
+##llah
+separates
+versa
+##mler
+scarborough
+labrador
+##lity
+##osing
+rutgers
+hurdles
+como
+166
+burt
+divers
+##100
+wichita
+cade
+coincided
+##erson
+bruised
+mla
+##pper
+vineyard
+##ili
+##brush
+notch
+mentioning
+jase
+hearted
+kits
+doe
+##acle
+pomerania
+##ady
+ronan
+seizure
+pavel
+problematic
+##zaki
+domenico
+##ulin
+catering
+penelope
+dependence
+parental
+emilio
+ministerial
+atkinson
+##bolic
+clarkson
+chargers
+colby
+grill
+peeked
+arises
+summon
+##aged
+fools
+##grapher
+faculties
+qaeda
+##vial
+garner
+refurbished
+##hwa
+geelong
+disasters
+nudged
+bs
+shareholder
+lori
+algae
+reinstated
+rot
+##ades
+##nous
+invites
+stainless
+183
+inclusive
+##itude
+diocesan
+til
+##icz
+denomination
+##xa
+benton
+floral
+registers
+##ider
+##erman
+##kell
+absurd
+brunei
+guangzhou
+hitter
+retaliation
+##uled
+##eve
+blanc
+nh
+consistency
+contamination
+##eres
+##rner
+dire
+palermo
+broadcasters
+diaries
+inspire
+vols
+brewer
+tightening
+ky
+mixtape
+hormone
+##tok
+stokes
+##color
+##dly
+##ssi
+pg
+##ometer
+##lington
+sanitation
+##tility
+intercontinental
+apps
+##adt
+¹⁄₂
+cylinders
+economies
+favourable
+unison
+croix
+gertrude
+odyssey
+vanity
+dangling
+##logists
+upgrades
+dice
+middleweight
+practitioner
+##ight
+206
+henrik
+parlor
+orion
+angered
+lac
+python
+blurted
+##rri
+sensual
+intends
+swings
+angled
+##phs
+husky
+attain
+peerage
+precinct
+textiles
+cheltenham
+shuffled
+dai
+confess
+tasting
+bhutan
+##riation
+tyrone
+segregation
+abrupt
+ruiz
+##rish
+smirked
+blackwell
+confidential
+browning
+amounted
+##put
+vase
+scarce
+fabulous
+raided
+staple
+guyana
+unemployed
+glider
+shay
+##tow
+carmine
+troll
+intervene
+squash
+superstar
+##uce
+cylindrical
+len
+roadway
+researched
+handy
+##rium
+##jana
+meta
+lao
+declares
+##rring
+##tadt
+##elin
+##kova
+willem
+shrubs
+napoleonic
+realms
+skater
+qi
+volkswagen
+##ł
+tad
+hara
+archaeologist
+awkwardly
+eerie
+##kind
+wiley
+##heimer
+##24
+titus
+organizers
+cfl
+crusaders
+lama
+usb
+vent
+enraged
+thankful
+occupants
+maximilian
+##gaard
+possessing
+textbooks
+##oran
+collaborator
+quaker
+##ulo
+avalanche
+mono
+silky
+straits
+isaiah
+mustang
+surged
+resolutions
+potomac
+descend
+cl
+kilograms
+plato
+strains
+saturdays
+##olin
+bernstein
+##ype
+holstein
+ponytail
+##watch
+belize
+conversely
+heroine
+perpetual
+##ylus
+charcoal
+piedmont
+glee
+negotiating
+backdrop
+prologue
+##jah
+##mmy
+pasadena
+climbs
+ramos
+sunni
+##holm
+##tner
+##tri
+anand
+deficiency
+hertfordshire
+stout
+##avi
+aperture
+orioles
+##irs
+doncaster
+intrigued
+bombed
+coating
+otis
+##mat
+cocktail
+##jit
+##eto
+amir
+arousal
+sar
+##proof
+##act
+##ories
+dixie
+pots
+##bow
+whereabouts
+159
+##fted
+drains
+bullying
+cottages
+scripture
+coherent
+fore
+poe
+appetite
+##uration
+sampled
+##ators
+##dp
+derrick
+rotor
+jays
+peacock
+installment
+##rro
+advisors
+##coming
+rodeo
+scotch
+##mot
+##db
+##fen
+##vant
+ensued
+rodrigo
+dictatorship
+martyrs
+twenties
+##н
+towed
+incidence
+marta
+rainforest
+sai
+scaled
+##cles
+oceanic
+qualifiers
+symphonic
+mcbride
+dislike
+generalized
+aubrey
+colonization
+##iation
+##lion
+##ssing
+disliked
+lublin
+salesman
+##ulates
+spherical
+whatsoever
+sweating
+avalon
+contention
+punt
+severity
+alderman
+atari
+##dina
+##grant
+##rop
+scarf
+seville
+vertices
+annexation
+fairfield
+fascination
+inspiring
+launches
+palatinate
+regretted
+##rca
+feral
+##iom
+elk
+nap
+olsen
+reddy
+yong
+##leader
+##iae
+garment
+transports
+feng
+gracie
+outrage
+viceroy
+insides
+##esis
+breakup
+grady
+organizer
+softer
+grimaced
+222
+murals
+galicia
+arranging
+vectors
+##rsten
+bas
+##sb
+##cens
+sloan
+##eka
+bitten
+ara
+fender
+nausea
+bumped
+kris
+banquet
+comrades
+detector
+persisted
+##llan
+adjustment
+endowed
+cinemas
+##shot
+sellers
+##uman
+peek
+epa
+kindly
+neglect
+simpsons
+talon
+mausoleum
+runaway
+hangul
+lookout
+##cic
+rewards
+coughed
+acquainted
+chloride
+##ald
+quicker
+accordion
+neolithic
+##qa
+artemis
+coefficient
+lenny
+pandora
+tx
+##xed
+ecstasy
+litter
+segunda
+chairperson
+gemma
+hiss
+rumor
+vow
+nasal
+antioch
+compensate
+patiently
+transformers
+##eded
+judo
+morrow
+penis
+posthumous
+philips
+bandits
+husbands
+denote
+flaming
+##any
+##phones
+langley
+yorker
+1760
+walters
+##uo
+##kle
+gubernatorial
+fatty
+samsung
+leroy
+outlaw
+##nine
+unpublished
+poole
+jakob
+##ᵢ
+##ₙ
+crete
+distorted
+superiority
+##dhi
+intercept
+crust
+mig
+claus
+crashes
+positioning
+188
+stallion
+301
+frontal
+armistice
+##estinal
+elton
+aj
+encompassing
+camel
+commemorated
+malaria
+woodward
+calf
+cigar
+penetrate
+##oso
+willard
+##rno
+##uche
+illustrate
+amusing
+convergence
+noteworthy
+##lma
+##rva
+journeys
+realise
+manfred
+##sable
+410
+##vocation
+hearings
+fiance
+##posed
+educators
+provoked
+adjusting
+##cturing
+modular
+stockton
+paterson
+vlad
+rejects
+electors
+selena
+maureen
+##tres
+uber
+##rce
+swirled
+##num
+proportions
+nanny
+pawn
+naturalist
+parma
+apostles
+awoke
+ethel
+wen
+##bey
+monsoon
+overview
+##inating
+mccain
+rendition
+risky
+adorned
+##ih
+equestrian
+germain
+nj
+conspicuous
+confirming
+##yoshi
+shivering
+##imeter
+milestone
+rumours
+flinched
+bounds
+smacked
+token
+##bei
+lectured
+automobiles
+##shore
+impacted
+##iable
+nouns
+nero
+##leaf
+ismail
+prostitute
+trams
+##lace
+bridget
+sud
+stimulus
+impressions
+reins
+revolves
+##oud
+##gned
+giro
+honeymoon
+##swell
+criterion
+##sms
+##uil
+libyan
+prefers
+##osition
+211
+preview
+sucks
+accusation
+bursts
+metaphor
+diffusion
+tolerate
+faye
+betting
+cinematographer
+liturgical
+specials
+bitterly
+humboldt
+##ckle
+flux
+rattled
+##itzer
+archaeologists
+odor
+authorised
+marshes
+discretion
+##ов
+alarmed
+archaic
+inverse
+##leton
+explorers
+##pine
+drummond
+tsunami
+woodlands
+##minate
+##tland
+booklet
+insanity
+owning
+insert
+crafted
+calculus
+##tore
+receivers
+##bt
+stung
+##eca
+##nched
+prevailing
+travellers
+eyeing
+lila
+graphs
+##borne
+178
+julien
+##won
+morale
+adaptive
+therapist
+erica
+cw
+libertarian
+bowman
+pitches
+vita
+##ional
+crook
+##ads
+##entation
+caledonia
+mutiny
+##sible
+1840s
+automation
+##ß
+flock
+##pia
+ironic
+pathology
+##imus
+remarried
+##22
+joker
+withstand
+energies
+##att
+shropshire
+hostages
+madeleine
+tentatively
+conflicting
+mateo
+recipes
+euros
+ol
+mercenaries
+nico
+##ndon
+albuquerque
+augmented
+mythical
+bel
+freud
+##child
+cough
+##lica
+365
+freddy
+lillian
+genetically
+nuremberg
+calder
+209
+bonn
+outdoors
+paste
+suns
+urgency
+vin
+restraint
+tyson
+##cera
+##selle
+barrage
+bethlehem
+kahn
+##par
+mounts
+nippon
+barony
+happier
+ryu
+makeshift
+sheldon
+blushed
+castillo
+barking
+listener
+taped
+bethel
+fluent
+headlines
+pornography
+rum
+disclosure
+sighing
+mace
+doubling
+gunther
+manly
+##plex
+rt
+interventions
+physiological
+forwards
+emerges
+##tooth
+##gny
+compliment
+rib
+recession
+visibly
+barge
+faults
+connector
+exquisite
+prefect
+##rlin
+patio
+##cured
+elevators
+brandt
+italics
+pena
+173
+wasp
+satin
+ea
+botswana
+graceful
+respectable
+##jima
+##rter
+##oic
+franciscan
+generates
+##dl
+alfredo
+disgusting
+##olate
+##iously
+sherwood
+warns
+cod
+promo
+cheryl
+sino
+##ة
+##escu
+twitch
+##zhi
+brownish
+thom
+ortiz
+##dron
+densely
+##beat
+carmel
+reinforce
+##bana
+187
+anastasia
+downhill
+vertex
+contaminated
+remembrance
+harmonic
+homework
+##sol
+fiancee
+gears
+olds
+angelica
+loft
+ramsay
+quiz
+colliery
+sevens
+##cape
+autism
+##hil
+walkway
+##boats
+ruben
+abnormal
+ounce
+khmer
+##bbe
+zachary
+bedside
+morphology
+punching
+##olar
+sparrow
+convinces
+##35
+hewitt
+queer
+remastered
+rods
+mabel
+solemn
+notified
+lyricist
+symmetric
+##xide
+174
+encore
+passports
+wildcats
+##uni
+baja
+##pac
+mildly
+##ease
+bleed
+commodity
+mounds
+glossy
+orchestras
+##omo
+damian
+prelude
+ambitions
+##vet
+awhile
+remotely
+##aud
+asserts
+imply
+##iques
+distinctly
+modelling
+remedy
+##dded
+windshield
+dani
+xiao
+##endra
+audible
+powerplant
+1300
+invalid
+elemental
+acquisitions
+##hala
+immaculate
+libby
+plata
+smuggling
+ventilation
+denoted
+minh
+##morphism
+430
+differed
+dion
+kelley
+lore
+mocking
+sabbath
+spikes
+hygiene
+drown
+runoff
+stylized
+tally
+liberated
+aux
+interpreter
+righteous
+aba
+siren
+reaper
+pearce
+millie
+##cier
+##yra
+gaius
+##iso
+captures
+##ttering
+dorm
+claudio
+##sic
+benches
+knighted
+blackness
+##ored
+discount
+fumble
+oxidation
+routed
+##ς
+novak
+perpendicular
+spoiled
+fracture
+splits
+##urt
+pads
+topology
+##cats
+axes
+fortunate
+offenders
+protestants
+esteem
+221
+broadband
+convened
+frankly
+hound
+prototypes
+isil
+facilitated
+keel
+##sher
+sahara
+awaited
+bubba
+orb
+prosecutors
+186
+hem
+520
+##xing
+relaxing
+remnant
+romney
+sorted
+slalom
+stefano
+ulrich
+##active
+exemption
+folder
+pauses
+foliage
+hitchcock
+epithet
+204
+criticisms
+##aca
+ballistic
+brody
+hinduism
+chaotic
+youths
+equals
+##pala
+pts
+thicker
+analogous
+capitalist
+improvised
+overseeing
+sinatra
+ascended
+beverage
+##tl
+straightforward
+##kon
+curran
+##west
+bois
+325
+induce
+surveying
+emperors
+sax
+unpopular
+##kk
+cartoonist
+fused
+##mble
+unto
+##yuki
+localities
+##cko
+##ln
+darlington
+slain
+academie
+lobbying
+sediment
+puzzles
+##grass
+defiance
+dickens
+manifest
+tongues
+alumnus
+arbor
+coincide
+184
+appalachian
+mustafa
+examiner
+cabaret
+traumatic
+yves
+bracelet
+draining
+heroin
+magnum
+baths
+odessa
+consonants
+mitsubishi
+##gua
+kellan
+vaudeville
+##fr
+joked
+null
+straps
+probation
+##ław
+ceded
+interfaces
+##pas
+##zawa
+blinding
+viet
+224
+rothschild
+museo
+640
+huddersfield
+##vr
+tactic
+##storm
+brackets
+dazed
+incorrectly
+##vu
+reg
+glazed
+fearful
+manifold
+benefited
+irony
+##sun
+stumbling
+##rte
+willingness
+balkans
+mei
+wraps
+##aba
+injected
+##lea
+gu
+syed
+harmless
+##hammer
+bray
+takeoff
+poppy
+timor
+cardboard
+astronaut
+purdue
+weeping
+southbound
+cursing
+stalls
+diagonal
+##neer
+lamar
+bryce
+comte
+weekdays
+harrington
+##uba
+negatively
+##see
+lays
+grouping
+##cken
+##henko
+affirmed
+halle
+modernist
+##lai
+hodges
+smelling
+aristocratic
+baptized
+dismiss
+justification
+oilers
+##now
+coupling
+qin
+snack
+healer
+##qing
+gardener
+layla
+battled
+formulated
+stephenson
+gravitational
+##gill
+##jun
+1768
+granny
+coordinating
+suites
+##cd
+##ioned
+monarchs
+##cote
+##hips
+sep
+blended
+apr
+barrister
+deposition
+fia
+mina
+policemen
+paranoid
+##pressed
+churchyard
+covert
+crumpled
+creep
+abandoning
+tr
+transmit
+conceal
+barr
+understands
+readiness
+spire
+##cology
+##enia
+##erry
+610
+startling
+unlock
+vida
+bowled
+slots
+##nat
+##islav
+spaced
+trusting
+admire
+rig
+##ink
+slack
+##70
+mv
+207
+casualty
+##wei
+classmates
+##odes
+##rar
+##rked
+amherst
+furnished
+evolve
+foundry
+menace
+mead
+##lein
+flu
+wesleyan
+##kled
+monterey
+webber
+##vos
+wil
+##mith
+##на
+bartholomew
+justices
+restrained
+##cke
+amenities
+191
+mediated
+sewage
+trenches
+ml
+mainz
+##thus
+1800s
+##cula
+##inski
+caine
+bonding
+213
+converts
+spheres
+superseded
+marianne
+crypt
+sweaty
+ensign
+historia
+##br
+spruce
+##post
+##ask
+forks
+thoughtfully
+yukon
+pamphlet
+ames
+##uter
+karma
+##yya
+bryn
+negotiation
+sighs
+incapable
+##mbre
+##ntial
+actresses
+taft
+##mill
+luce
+prevailed
+##amine
+1773
+motionless
+envoy
+testify
+investing
+sculpted
+instructors
+provence
+kali
+cullen
+horseback
+##while
+goodwin
+##jos
+gaa
+norte
+##ldon
+modify
+wavelength
+abd
+214
+skinned
+sprinter
+forecast
+scheduling
+marries
+squared
+tentative
+##chman
+boer
+##isch
+bolts
+swap
+fisherman
+assyrian
+impatiently
+guthrie
+martins
+murdoch
+194
+tanya
+nicely
+dolly
+lacy
+med
+##45
+syn
+decks
+fashionable
+millionaire
+##ust
+surfing
+##ml
+##ision
+heaved
+tammy
+consulate
+attendees
+routinely
+197
+fuse
+saxophonist
+backseat
+malaya
+##lord
+scowl
+tau
+##ishly
+193
+sighted
+steaming
+##rks
+303
+911
+##holes
+##hong
+ching
+##wife
+bless
+conserved
+jurassic
+stacey
+unix
+zion
+chunk
+rigorous
+blaine
+198
+peabody
+slayer
+dismay
+brewers
+nz
+##jer
+det
+##glia
+glover
+postwar
+int
+penetration
+sylvester
+imitation
+vertically
+airlift
+heiress
+knoxville
+viva
+##uin
+390
+macon
+##rim
+##fighter
+##gonal
+janice
+##orescence
+##wari
+marius
+belongings
+leicestershire
+196
+blanco
+inverted
+preseason
+sanity
+sobbing
+##due
+##elt
+##dled
+collingwood
+regeneration
+flickering
+shortest
+##mount
+##osi
+feminism
+##lat
+sherlock
+cabinets
+fumbled
+northbound
+precedent
+snaps
+##mme
+researching
+##akes
+guillaume
+insights
+manipulated
+vapor
+neighbour
+sap
+gangster
+frey
+f1
+stalking
+scarcely
+callie
+barnett
+tendencies
+audi
+doomed
+assessing
+slung
+panchayat
+ambiguous
+bartlett
+##etto
+distributing
+violating
+wolverhampton
+##hetic
+swami
+histoire
+##urus
+liable
+pounder
+groin
+hussain
+larsen
+popping
+surprises
+##atter
+vie
+curt
+##station
+mute
+relocate
+musicals
+authorization
+richter
+##sef
+immortality
+tna
+bombings
+##press
+deteriorated
+yiddish
+##acious
+robbed
+colchester
+cs
+pmid
+ao
+verified
+balancing
+apostle
+swayed
+recognizable
+oxfordshire
+retention
+nottinghamshire
+contender
+judd
+invitational
+shrimp
+uhf
+##icient
+cleaner
+longitudinal
+tanker
+##mur
+acronym
+broker
+koppen
+sundance
+suppliers
+##gil
+4000
+clipped
+fuels
+petite
+##anne
+landslide
+helene
+diversion
+populous
+landowners
+auspices
+melville
+quantitative
+##xes
+ferries
+nicky
+##llus
+doo
+haunting
+roche
+carver
+downed
+unavailable
+##pathy
+approximation
+hiroshima
+##hue
+garfield
+valle
+comparatively
+keyboardist
+traveler
+##eit
+congestion
+calculating
+subsidiaries
+##bate
+serb
+modernization
+fairies
+deepened
+ville
+averages
+##lore
+inflammatory
+tonga
+##itch
+co₂
+squads
+##hea
+gigantic
+serum
+enjoyment
+retailer
+verona
+35th
+cis
+##phobic
+magna
+technicians
+##vati
+arithmetic
+##sport
+levin
+##dation
+amtrak
+chow
+sienna
+##eyer
+backstage
+entrepreneurship
+##otic
+learnt
+tao
+##udy
+worcestershire
+formulation
+baggage
+hesitant
+bali
+sabotage
+##kari
+barren
+enhancing
+murmur
+pl
+freshly
+putnam
+syntax
+aces
+medicines
+resentment
+bandwidth
+##sier
+grins
+chili
+guido
+##sei
+framing
+implying
+gareth
+lissa
+genevieve
+pertaining
+admissions
+geo
+thorpe
+proliferation
+sato
+bela
+analyzing
+parting
+##gor
+awakened
+##isman
+huddled
+secrecy
+##kling
+hush
+gentry
+540
+dungeons
+##ego
+coasts
+##utz
+sacrificed
+##chule
+landowner
+mutually
+prevalence
+programmer
+adolescent
+disrupted
+seaside
+gee
+trusts
+vamp
+georgie
+##nesian
+##iol
+schedules
+sindh
+##market
+etched
+hm
+sparse
+bey
+beaux
+scratching
+gliding
+unidentified
+216
+collaborating
+gems
+jesuits
+oro
+accumulation
+shaping
+mbe
+anal
+##xin
+231
+enthusiasts
+newscast
+##egan
+janata
+dewey
+parkinson
+179
+ankara
+biennial
+towering
+dd
+inconsistent
+950
+##chet
+thriving
+terminate
+cabins
+furiously
+eats
+advocating
+donkey
+marley
+muster
+phyllis
+leiden
+##user
+grassland
+glittering
+iucn
+loneliness
+217
+memorandum
+armenians
+##ddle
+popularized
+rhodesia
+60s
+lame
+##illon
+sans
+bikini
+header
+orbits
+##xx
+##finger
+##ulator
+sharif
+spines
+biotechnology
+strolled
+naughty
+yates
+##wire
+fremantle
+milo
+##mour
+abducted
+removes
+##atin
+humming
+wonderland
+##chrome
+##ester
+hume
+pivotal
+##rates
+armand
+grams
+believers
+elector
+rte
+apron
+bis
+scraped
+##yria
+endorsement
+initials
+##llation
+eps
+dotted
+hints
+buzzing
+emigration
+nearer
+##tom
+indicators
+##ulu
+coarse
+neutron
+protectorate
+##uze
+directional
+exploits
+pains
+loire
+1830s
+proponents
+guggenheim
+rabbits
+ritchie
+305
+hectare
+inputs
+hutton
+##raz
+verify
+##ako
+boilers
+longitude
+##lev
+skeletal
+yer
+emilia
+citrus
+compromised
+##gau
+pokemon
+prescription
+paragraph
+eduard
+cadillac
+attire
+categorized
+kenyan
+weddings
+charley
+##bourg
+entertain
+monmouth
+##lles
+nutrients
+davey
+mesh
+incentive
+practised
+ecosystems
+kemp
+subdued
+overheard
+##rya
+bodily
+maxim
+##nius
+apprenticeship
+ursula
+##fight
+lodged
+rug
+silesian
+unconstitutional
+patel
+inspected
+coyote
+unbeaten
+##hak
+34th
+disruption
+convict
+parcel
+##cl
+##nham
+collier
+implicated
+mallory
+##iac
+##lab
+susannah
+winkler
+##rber
+shia
+phelps
+sediments
+graphical
+robotic
+##sner
+adulthood
+mart
+smoked
+##isto
+kathryn
+clarified
+##aran
+divides
+convictions
+oppression
+pausing
+burying
+##mt
+federico
+mathias
+eileen
+##tana
+kite
+hunched
+##acies
+189
+##atz
+disadvantage
+liza
+kinetic
+greedy
+paradox
+yokohama
+dowager
+trunks
+ventured
+##gement
+gupta
+vilnius
+olaf
+##thest
+crimean
+hopper
+##ej
+progressively
+arturo
+mouthed
+arrondissement
+##fusion
+rubin
+simulcast
+oceania
+##orum
+##stra
+##rred
+busiest
+intensely
+navigator
+cary
+##vine
+##hini
+##bies
+fife
+rowe
+rowland
+posing
+insurgents
+shafts
+lawsuits
+activate
+conor
+inward
+culturally
+garlic
+265
+##eering
+eclectic
+##hui
+##kee
+##nl
+furrowed
+vargas
+meteorological
+rendezvous
+##aus
+culinary
+commencement
+##dition
+quota
+##notes
+mommy
+salaries
+overlapping
+mule
+##iology
+##mology
+sums
+wentworth
+##isk
+##zione
+mainline
+subgroup
+##illy
+hack
+plaintiff
+verdi
+bulb
+differentiation
+engagements
+multinational
+supplemented
+bertrand
+caller
+regis
+##naire
+##sler
+##arts
+##imated
+blossom
+propagation
+kilometer
+viaduct
+vineyards
+##uate
+beckett
+optimization
+golfer
+songwriters
+seminal
+semitic
+thud
+volatile
+evolving
+ridley
+##wley
+trivial
+distributions
+scandinavia
+jiang
+##ject
+wrestled
+insistence
+##dio
+emphasizes
+napkin
+##ods
+adjunct
+rhyme
+##ricted
+##eti
+hopeless
+surrounds
+tremble
+32nd
+smoky
+##ntly
+oils
+medicinal
+padded
+steer
+wilkes
+219
+255
+concessions
+hue
+uniquely
+blinded
+landon
+yahoo
+##lane
+hendrix
+commemorating
+dex
+specify
+chicks
+##ggio
+intercity
+1400
+morley
+##torm
+highlighting
+##oting
+pang
+oblique
+stalled
+##liner
+flirting
+newborn
+1769
+bishopric
+shaved
+232
+currie
+##ush
+dharma
+spartan
+##ooped
+favorites
+smug
+novella
+sirens
+abusive
+creations
+espana
+##lage
+paradigm
+semiconductor
+sheen
+##rdo
+##yen
+##zak
+nrl
+renew
+##pose
+##tur
+adjutant
+marches
+norma
+##enity
+ineffective
+weimar
+grunt
+##gat
+lordship
+plotting
+expenditure
+infringement
+lbs
+refrain
+av
+mimi
+mistakenly
+postmaster
+1771
+##bara
+ras
+motorsports
+tito
+199
+subjective
+##zza
+bully
+stew
+##kaya
+prescott
+1a
+##raphic
+##zam
+bids
+styling
+paranormal
+reeve
+sneaking
+exploding
+katz
+akbar
+migrant
+syllables
+indefinitely
+##ogical
+destroys
+replaces
+applause
+##phine
+pest
+##fide
+218
+articulated
+bertie
+##thing
+##cars
+##ptic
+courtroom
+crowley
+aesthetics
+cummings
+tehsil
+hormones
+titanic
+dangerously
+##ibe
+stadion
+jaenelle
+auguste
+ciudad
+##chu
+mysore
+partisans
+##sio
+lucan
+philipp
+##aly
+debating
+henley
+interiors
+##rano
+##tious
+homecoming
+beyonce
+usher
+henrietta
+prepares
+weeds
+##oman
+ely
+plucked
+##pire
+##dable
+luxurious
+##aq
+artifact
+password
+pasture
+juno
+maddy
+minsk
+##dder
+##ologies
+##rone
+assessments
+martian
+royalist
+1765
+examines
+##mani
+##rge
+nino
+223
+parry
+scooped
+relativity
+##eli
+##uting
+##cao
+congregational
+noisy
+traverse
+##agawa
+strikeouts
+nickelodeon
+obituary
+transylvania
+binds
+depictions
+polk
+trolley
+##yed
+##lard
+breeders
+##under
+dryly
+hokkaido
+1762
+strengths
+stacks
+bonaparte
+connectivity
+neared
+prostitutes
+stamped
+anaheim
+gutierrez
+sinai
+##zzling
+bram
+fresno
+madhya
+##86
+proton
+##lena
+##llum
+##phon
+reelected
+wanda
+##anus
+##lb
+ample
+distinguishing
+##yler
+grasping
+sermons
+tomato
+bland
+stimulation
+avenues
+##eux
+spreads
+scarlett
+fern
+pentagon
+assert
+baird
+chesapeake
+ir
+calmed
+distortion
+fatalities
+##olis
+correctional
+pricing
+##astic
+##gina
+prom
+dammit
+ying
+collaborate
+##chia
+welterweight
+33rd
+pointer
+substitution
+bonded
+umpire
+communicating
+multitude
+paddle
+##obe
+federally
+intimacy
+##insky
+betray
+ssr
+##lett
+##lean
+##lves
+##therapy
+airbus
+##tery
+functioned
+ud
+bearer
+biomedical
+netflix
+##hire
+##nca
+condom
+brink
+ik
+##nical
+macy
+##bet
+flap
+gma
+experimented
+jelly
+lavender
+##icles
+##ulia
+munro
+##mian
+##tial
+rye
+##rle
+60th
+gigs
+hottest
+rotated
+predictions
+fuji
+bu
+##erence
+##omi
+barangay
+##fulness
+##sas
+clocks
+##rwood
+##liness
+cereal
+roe
+wight
+decker
+uttered
+babu
+onion
+xml
+forcibly
+##df
+petra
+sarcasm
+hartley
+peeled
+storytelling
+##42
+##xley
+##ysis
+##ffa
+fibre
+kiel
+auditor
+fig
+harald
+greenville
+##berries
+geographically
+nell
+quartz
+##athic
+cemeteries
+##lr
+crossings
+nah
+holloway
+reptiles
+chun
+sichuan
+snowy
+660
+corrections
+##ivo
+zheng
+ambassadors
+blacksmith
+fielded
+fluids
+hardcover
+turnover
+medications
+melvin
+academies
+##erton
+ro
+roach
+absorbing
+spaniards
+colton
+##founded
+outsider
+espionage
+kelsey
+245
+edible
+##ulf
+dora
+establishes
+##sham
+##tries
+contracting
+##tania
+cinematic
+costello
+nesting
+##uron
+connolly
+duff
+##nology
+mma
+##mata
+fergus
+sexes
+gi
+optics
+spectator
+woodstock
+banning
+##hee
+##fle
+differentiate
+outfielder
+refinery
+226
+312
+gerhard
+horde
+lair
+drastically
+##udi
+landfall
+##cheng
+motorsport
+odi
+##achi
+predominant
+quay
+skins
+##ental
+edna
+harshly
+complementary
+murdering
+##aves
+wreckage
+##90
+ono
+outstretched
+lennox
+munitions
+galen
+reconcile
+470
+scalp
+bicycles
+gillespie
+questionable
+rosenberg
+guillermo
+hostel
+jarvis
+kabul
+volvo
+opium
+yd
+##twined
+abuses
+decca
+outpost
+##cino
+sensible
+neutrality
+##64
+ponce
+anchorage
+atkins
+turrets
+inadvertently
+disagree
+libre
+vodka
+reassuring
+weighs
+##yal
+glide
+jumper
+ceilings
+repertory
+outs
+stain
+##bial
+envy
+##ucible
+smashing
+heightened
+policing
+hyun
+mixes
+lai
+prima
+##ples
+celeste
+##bina
+lucrative
+intervened
+kc
+manually
+##rned
+stature
+staffed
+bun
+bastards
+nairobi
+priced
+##auer
+thatcher
+##kia
+tripped
+comune
+##ogan
+##pled
+brasil
+incentives
+emanuel
+hereford
+musica
+##kim
+benedictine
+biennale
+##lani
+eureka
+gardiner
+rb
+knocks
+sha
+##ael
+##elled
+##onate
+efficacy
+ventura
+masonic
+sanford
+maize
+leverage
+##feit
+capacities
+santana
+##aur
+novelty
+vanilla
+##cter
+##tour
+benin
+##oir
+##rain
+neptune
+drafting
+tallinn
+##cable
+humiliation
+##boarding
+schleswig
+fabian
+bernardo
+liturgy
+spectacle
+sweeney
+pont
+routledge
+##tment
+cosmos
+ut
+hilt
+sleek
+universally
+##eville
+##gawa
+typed
+##dry
+favors
+allegheny
+glaciers
+##rly
+recalling
+aziz
+##log
+parasite
+requiem
+auf
+##berto
+##llin
+illumination
+##breaker
+##issa
+festivities
+bows
+govern
+vibe
+vp
+333
+sprawled
+larson
+pilgrim
+bwf
+leaping
+##rts
+##ssel
+alexei
+greyhound
+hoarse
+##dler
+##oration
+seneca
+##cule
+gaping
+##ulously
+##pura
+cinnamon
+##gens
+##rricular
+craven
+fantasies
+houghton
+engined
+reigned
+dictator
+supervising
+##oris
+bogota
+commentaries
+unnatural
+fingernails
+spirituality
+tighten
+##tm
+canadiens
+protesting
+intentional
+cheers
+sparta
+##ytic
+##iere
+##zine
+widen
+belgarath
+controllers
+dodd
+iaaf
+navarre
+##ication
+defect
+squire
+steiner
+whisky
+##mins
+560
+inevitably
+tome
+##gold
+chew
+##uid
+##lid
+elastic
+##aby
+streaked
+alliances
+jailed
+regal
+##ined
+##phy
+czechoslovak
+narration
+absently
+##uld
+bluegrass
+guangdong
+quran
+criticizing
+hose
+hari
+##liest
+##owa
+skier
+streaks
+deploy
+##lom
+raft
+bose
+dialed
+huff
+##eira
+haifa
+simplest
+bursting
+endings
+ib
+sultanate
+##titled
+franks
+whitman
+ensures
+sven
+##ggs
+collaborators
+forster
+organising
+ui
+banished
+napier
+injustice
+teller
+layered
+thump
+##otti
+roc
+battleships
+evidenced
+fugitive
+sadie
+robotics
+##roud
+equatorial
+geologist
+##iza
+yielding
+##bron
+##sr
+internationale
+mecca
+##diment
+sbs
+skyline
+toad
+uploaded
+reflective
+undrafted
+lal
+leafs
+bayern
+##dai
+lakshmi
+shortlisted
+##stick
+##wicz
+camouflage
+donate
+af
+christi
+lau
+##acio
+disclosed
+nemesis
+1761
+assemble
+straining
+northamptonshire
+tal
+##asi
+bernardino
+premature
+heidi
+42nd
+coefficients
+galactic
+reproduce
+buzzed
+sensations
+zionist
+monsieur
+myrtle
+##eme
+archery
+strangled
+musically
+viewpoint
+antiquities
+bei
+trailers
+seahawks
+cured
+pee
+preferring
+tasmanian
+lange
+sul
+##mail
+##working
+colder
+overland
+lucivar
+massey
+gatherings
+haitian
+##smith
+disapproval
+flaws
+##cco
+##enbach
+1766
+npr
+##icular
+boroughs
+creole
+forums
+techno
+1755
+dent
+abdominal
+streetcar
+##eson
+##stream
+procurement
+gemini
+predictable
+##tya
+acheron
+christoph
+feeder
+fronts
+vendor
+bernhard
+jammu
+tumors
+slang
+##uber
+goaltender
+twists
+curving
+manson
+vuelta
+mer
+peanut
+confessions
+pouch
+unpredictable
+allowance
+theodor
+vascular
+##factory
+bala
+authenticity
+metabolic
+coughing
+nanjing
+##cea
+pembroke
+##bard
+splendid
+36th
+ff
+hourly
+##ahu
+elmer
+handel
+##ivate
+awarding
+thrusting
+dl
+experimentation
+##hesion
+##46
+caressed
+entertained
+steak
+##rangle
+biologist
+orphans
+baroness
+oyster
+stepfather
+##dridge
+mirage
+reefs
+speeding
+##31
+barons
+1764
+227
+inhabit
+preached
+repealed
+##tral
+honoring
+boogie
+captives
+administer
+johanna
+##imate
+gel
+suspiciously
+1767
+sobs
+##dington
+backbone
+hayward
+garry
+##folding
+##nesia
+maxi
+##oof
+##ppe
+ellison
+galileo
+##stand
+crimea
+frenzy
+amour
+bumper
+matrices
+natalia
+baking
+garth
+palestinians
+##grove
+smack
+conveyed
+ensembles
+gardening
+##manship
+##rup
+##stituting
+1640
+harvesting
+topography
+jing
+shifters
+dormitory
+##carriage
+##lston
+ist
+skulls
+##stadt
+dolores
+jewellery
+sarawak
+##wai
+##zier
+fences
+christy
+confinement
+tumbling
+credibility
+fir
+stench
+##bria
+##plication
+##nged
+##sam
+virtues
+##belt
+marjorie
+pba
+##eem
+##made
+celebrates
+schooner
+agitated
+barley
+fulfilling
+anthropologist
+##pro
+restrict
+novi
+regulating
+##nent
+padres
+##rani
+##hesive
+loyola
+tabitha
+milky
+olson
+proprietor
+crambidae
+guarantees
+intercollegiate
+ljubljana
+hilda
+##sko
+ignorant
+hooded
+##lts
+sardinia
+##lidae
+##vation
+frontman
+privileged
+witchcraft
+##gp
+jammed
+laude
+poking
+##than
+bracket
+amazement
+yunnan
+##erus
+maharaja
+linnaeus
+264
+commissioning
+milano
+peacefully
+##logies
+akira
+rani
+regulator
+##36
+grasses
+##rance
+luzon
+crows
+compiler
+gretchen
+seaman
+edouard
+tab
+buccaneers
+ellington
+hamlets
+whig
+socialists
+##anto
+directorial
+easton
+mythological
+##kr
+##vary
+rhineland
+semantic
+taut
+dune
+inventions
+succeeds
+##iter
+replication
+branched
+##pired
+jul
+prosecuted
+kangaroo
+penetrated
+##avian
+middlesbrough
+doses
+bleak
+madam
+predatory
+relentless
+##vili
+reluctance
+##vir
+hailey
+crore
+silvery
+1759
+monstrous
+swimmers
+transmissions
+hawthorn
+informing
+##eral
+toilets
+caracas
+crouch
+kb
+##sett
+295
+cartel
+hadley
+##aling
+alexia
+yvonne
+##biology
+cinderella
+eton
+superb
+blizzard
+stabbing
+industrialist
+maximus
+##gm
+##orus
+groves
+maud
+clade
+oversized
+comedic
+##bella
+rosen
+nomadic
+fulham
+montane
+beverages
+galaxies
+redundant
+swarm
+##rot
+##folia
+##llis
+buckinghamshire
+fen
+bearings
+bahadur
+##rom
+gilles
+phased
+dynamite
+faber
+benoit
+vip
+##ount
+##wd
+booking
+fractured
+tailored
+anya
+spices
+westwood
+cairns
+auditions
+inflammation
+steamed
+##rocity
+##acion
+##urne
+skyla
+thereof
+watford
+torment
+archdeacon
+transforms
+lulu
+demeanor
+fucked
+serge
+##sor
+mckenna
+minas
+entertainer
+##icide
+caress
+originate
+residue
+##sty
+1740
+##ilised
+##org
+beech
+##wana
+subsidies
+##ghton
+emptied
+gladstone
+ru
+firefighters
+voodoo
+##rcle
+het
+nightingale
+tamara
+edmond
+ingredient
+weaknesses
+silhouette
+285
+compatibility
+withdrawing
+hampson
+##mona
+anguish
+giggling
+##mber
+bookstore
+##jiang
+southernmost
+tilting
+##vance
+bai
+economical
+rf
+briefcase
+dreadful
+hinted
+projections
+shattering
+totaling
+##rogate
+analogue
+indicted
+periodical
+fullback
+##dman
+haynes
+##tenberg
+##ffs
+##ishment
+1745
+thirst
+stumble
+penang
+vigorous
+##ddling
+##kor
+##lium
+octave
+##ove
+##enstein
+##inen
+##ones
+siberian
+##uti
+cbn
+repeal
+swaying
+##vington
+khalid
+tanaka
+unicorn
+otago
+plastered
+lobe
+riddle
+##rella
+perch
+##ishing
+croydon
+filtered
+graeme
+tripoli
+##ossa
+crocodile
+##chers
+sufi
+mined
+##tung
+inferno
+lsu
+##phi
+swelled
+utilizes
+£2
+cale
+periodicals
+styx
+hike
+informally
+coop
+lund
+##tidae
+ala
+hen
+qui
+transformations
+disposed
+sheath
+chickens
+##cade
+fitzroy
+sas
+silesia
+unacceptable
+odisha
+1650
+sabrina
+pe
+spokane
+ratios
+athena
+massage
+shen
+dilemma
+##drum
+##riz
+##hul
+corona
+doubtful
+niall
+##pha
+##bino
+fines
+cite
+acknowledging
+bangor
+ballard
+bathurst
+##resh
+huron
+mustered
+alzheimer
+garments
+kinase
+tyre
+warship
+##cp
+flashback
+pulmonary
+braun
+cheat
+kamal
+cyclists
+constructions
+grenades
+ndp
+traveller
+excuses
+stomped
+signalling
+trimmed
+futsal
+mosques
+relevance
+##wine
+wta
+##23
+##vah
+##lter
+hoc
+##riding
+optimistic
+##´s
+deco
+sim
+interacting
+rejecting
+moniker
+waterways
+##ieri
+##oku
+mayors
+gdansk
+outnumbered
+pearls
+##ended
+##hampton
+fairs
+totals
+dominating
+262
+notions
+stairway
+compiling
+pursed
+commodities
+grease
+yeast
+##jong
+carthage
+griffiths
+residual
+amc
+contraction
+laird
+sapphire
+##marine
+##ivated
+amalgamation
+dissolve
+inclination
+lyle
+packaged
+altitudes
+suez
+canons
+graded
+lurched
+narrowing
+boasts
+guise
+wed
+enrico
+##ovsky
+rower
+scarred
+bree
+cub
+iberian
+protagonists
+bargaining
+proposing
+trainers
+voyages
+vans
+fishes
+##aea
+##ivist
+##verance
+encryption
+artworks
+kazan
+sabre
+cleopatra
+hepburn
+rotting
+supremacy
+mecklenburg
+##brate
+burrows
+hazards
+outgoing
+flair
+organizes
+##ctions
+scorpion
+##usions
+boo
+234
+chevalier
+dunedin
+slapping
+##34
+ineligible
+pensions
+##38
+##omic
+manufactures
+emails
+bismarck
+238
+weakening
+blackish
+ding
+mcgee
+quo
+##rling
+northernmost
+xx
+manpower
+greed
+sampson
+clicking
+##ange
+##horpe
+##inations
+##roving
+torre
+##eptive
+##moral
+symbolism
+38th
+asshole
+meritorious
+outfits
+splashed
+biographies
+sprung
+astros
+##tale
+302
+737
+filly
+raoul
+nw
+tokugawa
+linden
+clubhouse
+##apa
+tracts
+romano
+##pio
+putin
+tags
+##note
+chained
+dickson
+gunshot
+moe
+gunn
+rashid
+##tails
+zipper
+##bas
+##nea
+contrasted
+##ply
+##udes
+plum
+pharaoh
+##pile
+aw
+comedies
+ingrid
+sandwiches
+subdivisions
+1100
+mariana
+nokia
+kamen
+hz
+delaney
+veto
+herring
+##words
+possessive
+outlines
+##roup
+siemens
+stairwell
+rc
+gallantry
+messiah
+palais
+yells
+233
+zeppelin
+##dm
+bolivar
+##cede
+smackdown
+mckinley
+##mora
+##yt
+muted
+geologic
+finely
+unitary
+avatar
+hamas
+maynard
+rees
+bog
+contrasting
+##rut
+liv
+chico
+disposition
+pixel
+##erate
+becca
+dmitry
+yeshiva
+narratives
+##lva
+##ulton
+mercenary
+sharpe
+tempered
+navigate
+stealth
+amassed
+keynes
+##lini
+untouched
+##rrie
+havoc
+lithium
+##fighting
+abyss
+graf
+southward
+wolverine
+balloons
+implements
+ngos
+transitions
+##icum
+ambushed
+concacaf
+dormant
+economists
+##dim
+costing
+csi
+rana
+universite
+boulders
+verity
+##llon
+collin
+mellon
+misses
+cypress
+fluorescent
+lifeless
+spence
+##ulla
+crewe
+shepard
+pak
+revelations
+##م
+jolly
+gibbons
+paw
+##dro
+##quel
+freeing
+##test
+shack
+fries
+palatine
+##51
+##hiko
+accompaniment
+cruising
+recycled
+##aver
+erwin
+sorting
+synthesizers
+dyke
+realities
+sg
+strides
+enslaved
+wetland
+##ghan
+competence
+gunpowder
+grassy
+maroon
+reactors
+objection
+##oms
+carlson
+gearbox
+macintosh
+radios
+shelton
+##sho
+clergyman
+prakash
+254
+mongols
+trophies
+oricon
+228
+stimuli
+twenty20
+cantonese
+cortes
+mirrored
+##saurus
+bhp
+cristina
+melancholy
+##lating
+enjoyable
+nuevo
+##wny
+downfall
+schumacher
+##ind
+banging
+lausanne
+rumbled
+paramilitary
+reflex
+ax
+amplitude
+migratory
+##gall
+##ups
+midi
+barnard
+lastly
+sherry
+##hp
+##nall
+keystone
+##kra
+carleton
+slippery
+##53
+coloring
+foe
+socket
+otter
+##rgos
+mats
+##tose
+consultants
+bafta
+bison
+topping
+##km
+490
+primal
+abandonment
+transplant
+atoll
+hideous
+mort
+pained
+reproduced
+tae
+howling
+##turn
+unlawful
+billionaire
+hotter
+poised
+lansing
+##chang
+dinamo
+retro
+messing
+nfc
+domesday
+##mina
+blitz
+timed
+##athing
+##kley
+ascending
+gesturing
+##izations
+signaled
+tis
+chinatown
+mermaid
+savanna
+jameson
+##aint
+catalina
+##pet
+##hers
+cochrane
+cy
+chatting
+##kus
+alerted
+computation
+mused
+noelle
+majestic
+mohawk
+campo
+octagonal
+##sant
+##hend
+241
+aspiring
+##mart
+comprehend
+iona
+paralyzed
+shimmering
+swindon
+rhone
+##eley
+reputed
+configurations
+pitchfork
+agitation
+francais
+gillian
+lipstick
+##ilo
+outsiders
+pontifical
+resisting
+bitterness
+sewer
+rockies
+##edd
+##ucher
+misleading
+1756
+exiting
+galloway
+##nging
+risked
+##heart
+246
+commemoration
+schultz
+##rka
+integrating
+##rsa
+poses
+shrieked
+##weiler
+guineas
+gladys
+jerking
+owls
+goldsmith
+nightly
+penetrating
+##unced
+lia
+##33
+ignited
+betsy
+##aring
+##thorpe
+follower
+vigorously
+##rave
+coded
+kiran
+knit
+zoology
+tbilisi
+##28
+##bered
+repository
+govt
+deciduous
+dino
+growling
+##bba
+enhancement
+unleashed
+chanting
+pussy
+biochemistry
+##eric
+kettle
+repression
+toxicity
+nrhp
+##arth
+##kko
+##bush
+ernesto
+commended
+outspoken
+242
+mca
+parchment
+sms
+kristen
+##aton
+bisexual
+raked
+glamour
+navajo
+a2
+conditioned
+showcased
+##hma
+spacious
+youthful
+##esa
+usl
+appliances
+junta
+brest
+layne
+conglomerate
+enchanted
+chao
+loosened
+picasso
+circulating
+inspect
+montevideo
+##centric
+##kti
+piazza
+spurred
+##aith
+bari
+freedoms
+poultry
+stamford
+lieu
+##ect
+indigo
+sarcastic
+bahia
+stump
+attach
+dvds
+frankenstein
+lille
+approx
+scriptures
+pollen
+##script
+nmi
+overseen
+##ivism
+tides
+proponent
+newmarket
+inherit
+milling
+##erland
+centralized
+##rou
+distributors
+credentials
+drawers
+abbreviation
+##lco
+##xon
+downing
+uncomfortably
+ripe
+##oes
+erase
+franchises
+##ever
+populace
+##bery
+##khar
+decomposition
+pleas
+##tet
+daryl
+sabah
+##stle
+##wide
+fearless
+genie
+lesions
+annette
+##ogist
+oboe
+appendix
+nair
+dripped
+petitioned
+maclean
+mosquito
+parrot
+rpg
+hampered
+1648
+operatic
+reservoirs
+##tham
+irrelevant
+jolt
+summarized
+##fp
+medallion
+##taff
+##−
+clawed
+harlow
+narrower
+goddard
+marcia
+bodied
+fremont
+suarez
+altering
+tempest
+mussolini
+porn
+##isms
+sweetly
+oversees
+walkers
+solitude
+grimly
+shrines
+hk
+ich
+supervisors
+hostess
+dietrich
+legitimacy
+brushes
+expressive
+##yp
+dissipated
+##rse
+localized
+systemic
+##nikov
+gettysburg
+##js
+##uaries
+dialogues
+muttering
+251
+housekeeper
+sicilian
+discouraged
+##frey
+beamed
+kaladin
+halftime
+kidnap
+##amo
+##llet
+1754
+synonymous
+depleted
+instituto
+insulin
+reprised
+##opsis
+clashed
+##ctric
+interrupting
+radcliffe
+insisting
+medici
+1715
+ejected
+playfully
+turbulent
+##47
+starvation
+##rini
+shipment
+rebellious
+petersen
+verification
+merits
+##rified
+cakes
+##charged
+1757
+milford
+shortages
+spying
+fidelity
+##aker
+emitted
+storylines
+harvested
+seismic
+##iform
+cheung
+kilda
+theoretically
+barbie
+lynx
+##rgy
+##tius
+goblin
+mata
+poisonous
+##nburg
+reactive
+residues
+obedience
+##евич
+conjecture
+##rac
+401
+hating
+sixties
+kicker
+moaning
+motown
+##bha
+emancipation
+neoclassical
+##hering
+consoles
+ebert
+professorship
+##tures
+sustaining
+assaults
+obeyed
+affluent
+incurred
+tornadoes
+##eber
+##zow
+emphasizing
+highlanders
+cheated
+helmets
+##ctus
+internship
+terence
+bony
+executions
+legislators
+berries
+peninsular
+tinged
+##aco
+1689
+amplifier
+corvette
+ribbons
+lavish
+pennant
+##lander
+worthless
+##chfield
+##forms
+mariano
+pyrenees
+expenditures
+##icides
+chesterfield
+mandir
+tailor
+39th
+sergey
+nestled
+willed
+aristocracy
+devotees
+goodnight
+raaf
+rumored
+weaponry
+remy
+appropriations
+harcourt
+burr
+riaa
+##lence
+limitation
+unnoticed
+guo
+soaking
+swamps
+##tica
+collapsing
+tatiana
+descriptive
+brigham
+psalm
+##chment
+maddox
+##lization
+patti
+caliph
+##aja
+akron
+injuring
+serra
+##ganj
+basins
+##sari
+astonished
+launcher
+##church
+hilary
+wilkins
+sewing
+##sf
+stinging
+##fia
+##ncia
+underwood
+startup
+##ition
+compilations
+vibrations
+embankment
+jurist
+##nity
+bard
+juventus
+groundwater
+kern
+palaces
+helium
+boca
+cramped
+marissa
+soto
+##worm
+jae
+princely
+##ggy
+faso
+bazaar
+warmly
+##voking
+229
+pairing
+##lite
+##grate
+##nets
+wien
+freaked
+ulysses
+rebirth
+##alia
+##rent
+mummy
+guzman
+jimenez
+stilled
+##nitz
+trajectory
+tha
+woken
+archival
+professions
+##pts
+##pta
+hilly
+shadowy
+shrink
+##bolt
+norwood
+glued
+migrate
+stereotypes
+devoid
+##pheus
+625
+evacuate
+horrors
+infancy
+gotham
+knowles
+optic
+downloaded
+sachs
+kingsley
+parramatta
+darryl
+mor
+##onale
+shady
+commence
+confesses
+kan
+##meter
+##placed
+marlborough
+roundabout
+regents
+frigates
+io
+##imating
+gothenburg
+revoked
+carvings
+clockwise
+convertible
+intruder
+##sche
+banged
+##ogo
+vicky
+bourgeois
+##mony
+dupont
+footing
+##gum
+pd
+##real
+buckle
+yun
+penthouse
+sane
+720
+serviced
+stakeholders
+neumann
+bb
+##eers
+comb
+##gam
+catchment
+pinning
+rallies
+typing
+##elles
+forefront
+freiburg
+sweetie
+giacomo
+widowed
+goodwill
+worshipped
+aspirations
+midday
+##vat
+fishery
+##trick
+bournemouth
+turk
+243
+hearth
+ethanol
+guadalajara
+murmurs
+sl
+##uge
+afforded
+scripted
+##hta
+wah
+##jn
+coroner
+translucent
+252
+memorials
+puck
+progresses
+clumsy
+##race
+315
+candace
+recounted
+##27
+##slin
+##uve
+filtering
+##mac
+howl
+strata
+heron
+leveled
+##ays
+dubious
+##oja
+##т
+##wheel
+citations
+exhibiting
+##laya
+##mics
+##pods
+turkic
+##lberg
+injunction
+##ennial
+##mit
+antibodies
+##44
+organise
+##rigues
+cardiovascular
+cushion
+inverness
+##zquez
+dia
+cocoa
+sibling
+##tman
+##roid
+expanse
+feasible
+tunisian
+algiers
+##relli
+rus
+bloomberg
+dso
+westphalia
+bro
+tacoma
+281
+downloads
+##ours
+konrad
+duran
+##hdi
+continuum
+jett
+compares
+legislator
+secession
+##nable
+##gues
+##zuka
+translating
+reacher
+##gley
+##ła
+aleppo
+##agi
+tc
+orchards
+trapping
+linguist
+versatile
+drumming
+postage
+calhoun
+superiors
+##mx
+barefoot
+leary
+##cis
+ignacio
+alfa
+kaplan
+##rogen
+bratislava
+mori
+##vot
+disturb
+haas
+313
+cartridges
+gilmore
+radiated
+salford
+tunic
+hades
+##ulsive
+archeological
+delilah
+magistrates
+auditioned
+brewster
+charters
+empowerment
+blogs
+cappella
+dynasties
+iroquois
+whipping
+##krishna
+raceway
+truths
+myra
+weaken
+judah
+mcgregor
+##horse
+mic
+refueling
+37th
+burnley
+bosses
+markus
+premio
+query
+##gga
+dunbar
+##economic
+darkest
+lyndon
+sealing
+commendation
+reappeared
+##mun
+addicted
+ezio
+slaughtered
+satisfactory
+shuffle
+##eves
+##thic
+##uj
+fortification
+warrington
+##otto
+resurrected
+fargo
+mane
+##utable
+##lei
+##space
+foreword
+ox
+##aris
+##vern
+abrams
+hua
+##mento
+sakura
+##alo
+uv
+sentimental
+##skaya
+midfield
+##eses
+sturdy
+scrolls
+macleod
+##kyu
+entropy
+##lance
+mitochondrial
+cicero
+excelled
+thinner
+convoys
+perceive
+##oslav
+##urable
+systematically
+grind
+burkina
+287
+##tagram
+ops
+##aman
+guantanamo
+##cloth
+##tite
+forcefully
+wavy
+##jou
+pointless
+##linger
+##tze
+layton
+portico
+superficial
+clerical
+outlaws
+##hism
+burials
+muir
+##inn
+creditors
+hauling
+rattle
+##leg
+calais
+monde
+archers
+reclaimed
+dwell
+wexford
+hellenic
+falsely
+remorse
+##tek
+dough
+furnishings
+##uttered
+gabon
+neurological
+novice
+##igraphy
+contemplated
+pulpit
+nightstand
+saratoga
+##istan
+documenting
+pulsing
+taluk
+##firmed
+busted
+marital
+##rien
+disagreements
+wasps
+##yes
+hodge
+mcdonnell
+mimic
+fran
+pendant
+dhabi
+musa
+##nington
+congratulations
+argent
+darrell
+concussion
+losers
+regrets
+thessaloniki
+reversal
+donaldson
+hardwood
+thence
+achilles
+ritter
+##eran
+demonic
+jurgen
+prophets
+goethe
+eki
+classmate
+buff
+##cking
+yank
+irrational
+##inging
+perished
+seductive
+qur
+sourced
+##crat
+##typic
+mustard
+ravine
+barre
+horizontally
+characterization
+phylogenetic
+boise
+##dit
+##runner
+##tower
+brutally
+intercourse
+seduce
+##bbing
+fay
+ferris
+ogden
+amar
+nik
+unarmed
+##inator
+evaluating
+kyrgyzstan
+sweetness
+##lford
+##oki
+mccormick
+meiji
+notoriety
+stimulate
+disrupt
+figuring
+instructional
+mcgrath
+##zoo
+groundbreaking
+##lto
+flinch
+khorasan
+agrarian
+bengals
+mixer
+radiating
+##sov
+ingram
+pitchers
+nad
+tariff
+##cript
+tata
+##codes
+##emi
+##ungen
+appellate
+lehigh
+##bled
+##giri
+brawl
+duct
+texans
+##ciation
+##ropolis
+skipper
+speculative
+vomit
+doctrines
+stresses
+253
+davy
+graders
+whitehead
+jozef
+timely
+cumulative
+haryana
+paints
+appropriately
+boon
+cactus
+##ales
+##pid
+dow
+legions
+##pit
+perceptions
+1730
+picturesque
+##yse
+periphery
+rune
+wr
+##aha
+celtics
+sentencing
+whoa
+##erin
+confirms
+variance
+425
+moines
+mathews
+spade
+rave
+m1
+fronted
+fx
+blending
+alleging
+reared
+##gl
+237
+##paper
+grassroots
+eroded
+##free
+##physical
+directs
+ordeal
+##sław
+accelerate
+hacker
+rooftop
+##inia
+lev
+buys
+cebu
+devote
+##lce
+specialising
+##ulsion
+choreographed
+repetition
+warehouses
+##ryl
+paisley
+tuscany
+analogy
+sorcerer
+hash
+huts
+shards
+descends
+exclude
+nix
+chaplin
+gaga
+ito
+vane
+##drich
+causeway
+misconduct
+limo
+orchestrated
+glands
+jana
+##kot
+u2
+##mple
+##sons
+branching
+contrasts
+scoop
+longed
+##virus
+chattanooga
+##75
+syrup
+cornerstone
+##tized
+##mind
+##iaceae
+careless
+precedence
+frescoes
+##uet
+chilled
+consult
+modelled
+snatch
+peat
+##thermal
+caucasian
+humane
+relaxation
+spins
+temperance
+##lbert
+occupations
+lambda
+hybrids
+moons
+mp3
+##oese
+247
+rolf
+societal
+yerevan
+ness
+##ssler
+befriended
+mechanized
+nominate
+trough
+boasted
+cues
+seater
+##hom
+bends
+##tangle
+conductors
+emptiness
+##lmer
+eurasian
+adriatic
+tian
+##cie
+anxiously
+lark
+propellers
+chichester
+jock
+ev
+2a
+##holding
+credible
+recounts
+tori
+loyalist
+abduction
+##hoot
+##redo
+nepali
+##mite
+ventral
+tempting
+##ango
+##crats
+steered
+##wice
+javelin
+dipping
+laborers
+prentice
+looming
+titanium
+##ː
+badges
+emir
+tensor
+##ntation
+egyptians
+rash
+denies
+hawthorne
+lombard
+showers
+wehrmacht
+dietary
+trojan
+##reus
+welles
+executing
+horseshoe
+lifeboat
+##lak
+elsa
+infirmary
+nearing
+roberta
+boyer
+mutter
+trillion
+joanne
+##fine
+##oked
+sinks
+vortex
+uruguayan
+clasp
+sirius
+##block
+accelerator
+prohibit
+sunken
+byu
+chronological
+diplomats
+ochreous
+510
+symmetrical
+1644
+maia
+##tology
+salts
+reigns
+atrocities
+##ия
+hess
+bared
+issn
+##vyn
+cater
+saturated
+##cycle
+##isse
+sable
+voyager
+dyer
+yusuf
+##inge
+fountains
+wolff
+##39
+##nni
+engraving
+rollins
+atheist
+ominous
+##ault
+herr
+chariot
+martina
+strung
+##fell
+##farlane
+horrific
+sahib
+gazes
+saetan
+erased
+ptolemy
+##olic
+flushing
+lauderdale
+analytic
+##ices
+530
+navarro
+beak
+gorilla
+herrera
+broom
+guadalupe
+raiding
+sykes
+311
+bsc
+deliveries
+1720
+invasions
+carmichael
+tajikistan
+thematic
+ecumenical
+sentiments
+onstage
+##rians
+##brand
+##sume
+catastrophic
+flanks
+molten
+##arns
+waller
+aimee
+terminating
+##icing
+alternately
+##oche
+nehru
+printers
+outraged
+##eving
+empires
+template
+banners
+repetitive
+za
+##oise
+vegetarian
+##tell
+guiana
+opt
+cavendish
+lucknow
+synthesized
+##hani
+##mada
+finalized
+##ctable
+fictitious
+mayoral
+unreliable
+##enham
+embracing
+peppers
+rbis
+##chio
+##neo
+inhibition
+slashed
+togo
+orderly
+embroidered
+safari
+salty
+236
+barron
+benito
+totaled
+##dak
+pubs
+simulated
+caden
+devin
+tolkien
+momma
+welding
+sesame
+##ept
+gottingen
+hardness
+630
+shaman
+temeraire
+620
+adequately
+pediatric
+##kit
+ck
+assertion
+radicals
+composure
+cadence
+seafood
+beaufort
+lazarus
+mani
+warily
+cunning
+kurdistan
+249
+cantata
+##kir
+ares
+##41
+##clusive
+nape
+townland
+geared
+insulted
+flutter
+boating
+violate
+draper
+dumping
+malmo
+##hh
+##romatic
+firearm
+alta
+bono
+obscured
+##clave
+exceeds
+panorama
+unbelievable
+##train
+preschool
+##essed
+disconnected
+installing
+rescuing
+secretaries
+accessibility
+##castle
+##drive
+##ifice
+##film
+bouts
+slug
+waterway
+mindanao
+##buro
+##ratic
+halves
+##ل
+calming
+liter
+maternity
+adorable
+bragg
+electrification
+mcc
+##dote
+roxy
+schizophrenia
+##body
+munoz
+kaye
+whaling
+239
+mil
+tingling
+tolerant
+##ago
+unconventional
+volcanoes
+##finder
+deportivo
+##llie
+robson
+kaufman
+neuroscience
+wai
+deportation
+masovian
+scraping
+converse
+##bh
+hacking
+bulge
+##oun
+administratively
+yao
+580
+amp
+mammoth
+booster
+claremont
+hooper
+nomenclature
+pursuits
+mclaughlin
+melinda
+##sul
+catfish
+barclay
+substrates
+taxa
+zee
+originals
+kimberly
+packets
+padma
+##ality
+borrowing
+ostensibly
+solvent
+##bri
+##genesis
+##mist
+lukas
+shreveport
+veracruz
+##ь
+##lou
+##wives
+cheney
+tt
+anatolia
+hobbs
+##zyn
+cyclic
+radiant
+alistair
+greenish
+siena
+dat
+independents
+##bation
+conform
+pieter
+hyper
+applicant
+bradshaw
+spores
+telangana
+vinci
+inexpensive
+nuclei
+322
+jang
+nme
+soho
+spd
+##ign
+cradled
+receptionist
+pow
+##43
+##rika
+fascism
+##ifer
+experimenting
+##ading
+##iec
+##region
+345
+jocelyn
+maris
+stair
+nocturnal
+toro
+constabulary
+elgin
+##kker
+msc
+##giving
+##schen
+##rase
+doherty
+doping
+sarcastically
+batter
+maneuvers
+##cano
+##apple
+##gai
+##git
+intrinsic
+##nst
+##stor
+1753
+showtime
+cafes
+gasps
+lviv
+ushered
+##thed
+fours
+restart
+astonishment
+transmitting
+flyer
+shrugs
+##sau
+intriguing
+cones
+dictated
+mushrooms
+medial
+##kovsky
+##elman
+escorting
+gaped
+##26
+godfather
+##door
+##sell
+djs
+recaptured
+timetable
+vila
+1710
+3a
+aerodrome
+mortals
+scientology
+##orne
+angelina
+mag
+convection
+unpaid
+insertion
+intermittent
+lego
+##nated
+endeavor
+kota
+pereira
+##lz
+304
+bwv
+glamorgan
+insults
+agatha
+fey
+##cend
+fleetwood
+mahogany
+protruding
+steamship
+zeta
+##arty
+mcguire
+suspense
+##sphere
+advising
+urges
+##wala
+hurriedly
+meteor
+gilded
+inline
+arroyo
+stalker
+##oge
+excitedly
+revered
+##cure
+earle
+introductory
+##break
+##ilde
+mutants
+puff
+pulses
+reinforcement
+##haling
+curses
+lizards
+stalk
+correlated
+##fixed
+fallout
+macquarie
+##unas
+bearded
+denton
+heaving
+802
+##ocation
+winery
+assign
+dortmund
+##lkirk
+everest
+invariant
+charismatic
+susie
+##elling
+bled
+lesley
+telegram
+sumner
+bk
+##ogen
+##к
+wilcox
+needy
+colbert
+duval
+##iferous
+##mbled
+allotted
+attends
+imperative
+##hita
+replacements
+hawker
+##inda
+insurgency
+##zee
+##eke
+casts
+##yla
+680
+ives
+transitioned
+##pack
+##powering
+authoritative
+baylor
+flex
+cringed
+plaintiffs
+woodrow
+##skie
+drastic
+ape
+aroma
+unfolded
+commotion
+nt
+preoccupied
+theta
+routines
+lasers
+privatization
+wand
+domino
+ek
+clenching
+nsa
+strategically
+showered
+bile
+handkerchief
+pere
+storing
+christophe
+insulting
+316
+nakamura
+romani
+asiatic
+magdalena
+palma
+cruises
+stripping
+405
+konstantin
+soaring
+##berman
+colloquially
+forerunner
+havilland
+incarcerated
+parasites
+sincerity
+##utus
+disks
+plank
+saigon
+##ining
+corbin
+homo
+ornaments
+powerhouse
+##tlement
+chong
+fastened
+feasibility
+idf
+morphological
+usable
+##nish
+##zuki
+aqueduct
+jaguars
+keepers
+##flies
+aleksandr
+faust
+assigns
+ewing
+bacterium
+hurled
+tricky
+hungarians
+integers
+wallis
+321
+yamaha
+##isha
+hushed
+oblivion
+aviator
+evangelist
+friars
+##eller
+monograph
+ode
+##nary
+airplanes
+labourers
+charms
+##nee
+1661
+hagen
+tnt
+rudder
+fiesta
+transcript
+dorothea
+ska
+inhibitor
+maccabi
+retorted
+raining
+encompassed
+clauses
+menacing
+1642
+lineman
+##gist
+vamps
+##ape
+##dick
+gloom
+##rera
+dealings
+easing
+seekers
+##nut
+##pment
+helens
+unmanned
+##anu
+##isson
+basics
+##amy
+##ckman
+adjustments
+1688
+brutality
+horne
+##zell
+sui
+##55
+##mable
+aggregator
+##thal
+rhino
+##drick
+##vira
+counters
+zoom
+##01
+##rting
+mn
+montenegrin
+packard
+##unciation
+##♭
+##kki
+reclaim
+scholastic
+thugs
+pulsed
+##icia
+syriac
+quan
+saddam
+banda
+kobe
+blaming
+buddies
+dissent
+##lusion
+##usia
+corbett
+jaya
+delle
+erratic
+lexie
+##hesis
+435
+amiga
+hermes
+##pressing
+##leen
+chapels
+gospels
+jamal
+##uating
+compute
+revolving
+warp
+##sso
+##thes
+armory
+##eras
+##gol
+antrim
+loki
+##kow
+##asian
+##good
+##zano
+braid
+handwriting
+subdistrict
+funky
+pantheon
+##iculate
+concurrency
+estimation
+improper
+juliana
+##his
+newcomers
+johnstone
+staten
+communicated
+##oco
+##alle
+sausage
+stormy
+##stered
+##tters
+superfamily
+##grade
+acidic
+collateral
+tabloid
+##oped
+##rza
+bladder
+austen
+##ellant
+mcgraw
+##hay
+hannibal
+mein
+aquino
+lucifer
+wo
+badger
+boar
+cher
+christensen
+greenberg
+interruption
+##kken
+jem
+244
+mocked
+bottoms
+cambridgeshire
+##lide
+sprawling
+##bbly
+eastwood
+ghent
+synth
+##buck
+advisers
+##bah
+nominally
+hapoel
+qu
+daggers
+estranged
+fabricated
+towels
+vinnie
+wcw
+misunderstanding
+anglia
+nothin
+unmistakable
+##dust
+##lova
+chilly
+marquette
+truss
+##edge
+##erine
+reece
+##lty
+##chemist
+##connected
+272
+308
+41st
+bash
+raion
+waterfalls
+##ump
+##main
+labyrinth
+queue
+theorist
+##istle
+bharatiya
+flexed
+soundtracks
+rooney
+leftist
+patrolling
+wharton
+plainly
+alleviate
+eastman
+schuster
+topographic
+engages
+immensely
+unbearable
+fairchild
+1620
+dona
+lurking
+parisian
+oliveira
+ia
+indictment
+hahn
+bangladeshi
+##aster
+vivo
+##uming
+##ential
+antonia
+expects
+indoors
+kildare
+harlan
+##logue
+##ogenic
+##sities
+forgiven
+##wat
+childish
+tavi
+##mide
+##orra
+plausible
+grimm
+successively
+scooted
+##bola
+##dget
+##rith
+spartans
+emery
+flatly
+azure
+epilogue
+##wark
+flourish
+##iny
+##tracted
+##overs
+##oshi
+bestseller
+distressed
+receipt
+spitting
+hermit
+topological
+##cot
+drilled
+subunit
+francs
+##layer
+eel
+##fk
+##itas
+octopus
+footprint
+petitions
+ufo
+##say
+##foil
+interfering
+leaking
+palo
+##metry
+thistle
+valiant
+##pic
+narayan
+mcpherson
+##fast
+gonzales
+##ym
+##enne
+dustin
+novgorod
+solos
+##zman
+doin
+##raph
+##patient
+##meyer
+soluble
+ashland
+cuffs
+carole
+pendleton
+whistling
+vassal
+##river
+deviation
+revisited
+constituents
+rallied
+rotate
+loomed
+##eil
+##nting
+amateurs
+augsburg
+auschwitz
+crowns
+skeletons
+##cona
+bonnet
+257
+dummy
+globalization
+simeon
+sleeper
+mandal
+differentiated
+##crow
+##mare
+milne
+bundled
+exasperated
+talmud
+owes
+segregated
+##feng
+##uary
+dentist
+piracy
+props
+##rang
+devlin
+##torium
+malicious
+paws
+##laid
+dependency
+##ergy
+##fers
+##enna
+258
+pistons
+rourke
+jed
+grammatical
+tres
+maha
+wig
+512
+ghostly
+jayne
+##achal
+##creen
+##ilis
+##lins
+##rence
+designate
+##with
+arrogance
+cambodian
+clones
+showdown
+throttle
+twain
+##ception
+lobes
+metz
+nagoya
+335
+braking
+##furt
+385
+roaming
+##minster
+amin
+crippled
+##37
+##llary
+indifferent
+hoffmann
+idols
+intimidating
+1751
+261
+influenza
+memo
+onions
+1748
+bandage
+consciously
+##landa
+##rage
+clandestine
+observes
+swiped
+tangle
+##ener
+##jected
+##trum
+##bill
+##lta
+hugs
+congresses
+josiah
+spirited
+##dek
+humanist
+managerial
+filmmaking
+inmate
+rhymes
+debuting
+grimsby
+ur
+##laze
+duplicate
+vigor
+##tf
+republished
+bolshevik
+refurbishment
+antibiotics
+martini
+methane
+newscasts
+royale
+horizons
+levant
+iain
+visas
+##ischen
+paler
+##around
+manifestation
+snuck
+alf
+chop
+futile
+pedestal
+rehab
+##kat
+bmg
+kerman
+res
+fairbanks
+jarrett
+abstraction
+saharan
+##zek
+1746
+procedural
+clearer
+kincaid
+sash
+luciano
+##ffey
+crunch
+helmut
+##vara
+revolutionaries
+##tute
+creamy
+leach
+##mmon
+1747
+permitting
+nes
+plight
+wendell
+##lese
+contra
+ts
+clancy
+ipa
+mach
+staples
+autopsy
+disturbances
+nueva
+karin
+pontiac
+##uding
+proxy
+venerable
+haunt
+leto
+bergman
+expands
+##helm
+wal
+##pipe
+canning
+celine
+cords
+obesity
+##enary
+intrusion
+planner
+##phate
+reasoned
+sequencing
+307
+harrow
+##chon
+##dora
+marred
+mcintyre
+repay
+tarzan
+darting
+248
+harrisburg
+margarita
+repulsed
+##hur
+##lding
+belinda
+hamburger
+novo
+compliant
+runways
+bingham
+registrar
+skyscraper
+ic
+cuthbert
+improvisation
+livelihood
+##corp
+##elial
+admiring
+##dened
+sporadic
+believer
+casablanca
+popcorn
+##29
+asha
+shovel
+##bek
+##dice
+coiled
+tangible
+##dez
+casper
+elsie
+resin
+tenderness
+rectory
+##ivision
+avail
+sonar
+##mori
+boutique
+##dier
+guerre
+bathed
+upbringing
+vaulted
+sandals
+blessings
+##naut
+##utnant
+1680
+306
+foxes
+pia
+corrosion
+hesitantly
+confederates
+crystalline
+footprints
+shapiro
+tirana
+valentin
+drones
+45th
+microscope
+shipments
+texted
+inquisition
+wry
+guernsey
+unauthorized
+resigning
+760
+ripple
+schubert
+stu
+reassure
+felony
+##ardo
+brittle
+koreans
+##havan
+##ives
+dun
+implicit
+tyres
+##aldi
+##lth
+magnolia
+##ehan
+##puri
+##poulos
+aggressively
+fei
+gr
+familiarity
+##poo
+indicative
+##trust
+fundamentally
+jimmie
+overrun
+395
+anchors
+moans
+##opus
+britannia
+armagh
+##ggle
+purposely
+seizing
+##vao
+bewildered
+mundane
+avoidance
+cosmopolitan
+geometridae
+quartermaster
+caf
+415
+chatter
+engulfed
+gleam
+purge
+##icate
+juliette
+jurisprudence
+guerra
+revisions
+##bn
+casimir
+brew
+##jm
+1749
+clapton
+cloudy
+conde
+hermitage
+278
+simulations
+torches
+vincenzo
+matteo
+##rill
+hidalgo
+booming
+westbound
+accomplishment
+tentacles
+unaffected
+##sius
+annabelle
+flopped
+sloping
+##litz
+dreamer
+interceptor
+vu
+##loh
+consecration
+copying
+messaging
+breaker
+climates
+hospitalized
+1752
+torino
+afternoons
+winfield
+witnessing
+##teacher
+breakers
+choirs
+sawmill
+coldly
+##ege
+sipping
+haste
+uninhabited
+conical
+bibliography
+pamphlets
+severn
+edict
+##oca
+deux
+illnesses
+grips
+##pl
+rehearsals
+sis
+thinkers
+tame
+##keepers
+1690
+acacia
+reformer
+##osed
+##rys
+shuffling
+##iring
+##shima
+eastbound
+ionic
+rhea
+flees
+littered
+##oum
+rocker
+vomiting
+groaning
+champ
+overwhelmingly
+civilizations
+paces
+sloop
+adoptive
+##tish
+skaters
+##vres
+aiding
+mango
+##joy
+nikola
+shriek
+##ignon
+pharmaceuticals
+##mg
+tuna
+calvert
+gustavo
+stocked
+yearbook
+##urai
+##mana
+computed
+subsp
+riff
+hanoi
+kelvin
+hamid
+moors
+pastures
+summons
+jihad
+nectar
+##ctors
+bayou
+untitled
+pleasing
+vastly
+republics
+intellect
+##η
+##ulio
+##tou
+crumbling
+stylistic
+sb
+##ی
+consolation
+frequented
+h₂o
+walden
+widows
+##iens
+404
+##ignment
+chunks
+improves
+288
+grit
+recited
+##dev
+snarl
+sociological
+##arte
+##gul
+inquired
+##held
+bruise
+clube
+consultancy
+homogeneous
+hornets
+multiplication
+pasta
+prick
+savior
+##grin
+##kou
+##phile
+yoon
+##gara
+grimes
+vanishing
+cheering
+reacting
+bn
+distillery
+##quisite
+##vity
+coe
+dockyard
+massif
+##jord
+escorts
+voss
+##valent
+byte
+chopped
+hawke
+illusions
+workings
+floats
+##koto
+##vac
+kv
+annapolis
+madden
+##onus
+alvaro
+noctuidae
+##cum
+##scopic
+avenge
+steamboat
+forte
+illustrates
+erika
+##trip
+570
+dew
+nationalities
+bran
+manifested
+thirsty
+diversified
+muscled
+reborn
+##standing
+arson
+##lessness
+##dran
+##logram
+##boys
+##kushima
+##vious
+willoughby
+##phobia
+286
+alsace
+dashboard
+yuki
+##chai
+granville
+myspace
+publicized
+tricked
+##gang
+adjective
+##ater
+relic
+reorganisation
+enthusiastically
+indications
+saxe
+##lassified
+consolidate
+iec
+padua
+helplessly
+ramps
+renaming
+regulars
+pedestrians
+accents
+convicts
+inaccurate
+lowers
+mana
+##pati
+barrie
+bjp
+outta
+someplace
+berwick
+flanking
+invoked
+marrow
+sparsely
+excerpts
+clothed
+rei
+##ginal
+wept
+##straße
+##vish
+alexa
+excel
+##ptive
+membranes
+aquitaine
+creeks
+cutler
+sheppard
+implementations
+ns
+##dur
+fragrance
+budge
+concordia
+magnesium
+marcelo
+##antes
+gladly
+vibrating
+##rral
+##ggles
+montrose
+##omba
+lew
+seamus
+1630
+cocky
+##ament
+##uen
+bjorn
+##rrick
+fielder
+fluttering
+##lase
+methyl
+kimberley
+mcdowell
+reductions
+barbed
+##jic
+##tonic
+aeronautical
+condensed
+distracting
+##promising
+huffed
+##cala
+##sle
+claudius
+invincible
+missy
+pious
+balthazar
+ci
+##lang
+butte
+combo
+orson
+##dication
+myriad
+1707
+silenced
+##fed
+##rh
+coco
+netball
+yourselves
+##oza
+clarify
+heller
+peg
+durban
+etudes
+offender
+roast
+blackmail
+curvature
+##woods
+vile
+309
+illicit
+suriname
+##linson
+overture
+1685
+bubbling
+gymnast
+tucking
+##mming
+##ouin
+maldives
+##bala
+gurney
+##dda
+##eased
+##oides
+backside
+pinto
+jars
+racehorse
+tending
+##rdial
+baronetcy
+wiener
+duly
+##rke
+barbarian
+cupping
+flawed
+##thesis
+bertha
+pleistocene
+puddle
+swearing
+##nob
+##tically
+fleeting
+prostate
+amulet
+educating
+##mined
+##iti
+##tler
+75th
+jens
+respondents
+analytics
+cavaliers
+papacy
+raju
+##iente
+##ulum
+##tip
+funnel
+271
+disneyland
+##lley
+sociologist
+##iam
+2500
+faulkner
+louvre
+menon
+##dson
+276
+##ower
+afterlife
+mannheim
+peptide
+referees
+comedians
+meaningless
+##anger
+##laise
+fabrics
+hurley
+renal
+sleeps
+##bour
+##icle
+breakout
+kristin
+roadside
+animator
+clover
+disdain
+unsafe
+redesign
+##urity
+firth
+barnsley
+portage
+reset
+narrows
+268
+commandos
+expansive
+speechless
+tubular
+##lux
+essendon
+eyelashes
+smashwords
+##yad
+##bang
+##claim
+craved
+sprinted
+chet
+somme
+astor
+wrocław
+orton
+266
+bane
+##erving
+##uing
+mischief
+##amps
+##sund
+scaling
+terre
+##xious
+impairment
+offenses
+undermine
+moi
+soy
+contiguous
+arcadia
+inuit
+seam
+##tops
+macbeth
+rebelled
+##icative
+##iot
+590
+elaborated
+frs
+uniformed
+##dberg
+259
+powerless
+priscilla
+stimulated
+980
+qc
+arboretum
+frustrating
+trieste
+bullock
+##nified
+enriched
+glistening
+intern
+##adia
+locus
+nouvelle
+ollie
+ike
+lash
+starboard
+ee
+tapestry
+headlined
+hove
+rigged
+##vite
+pollock
+##yme
+thrive
+clustered
+cas
+roi
+gleamed
+olympiad
+##lino
+pressured
+regimes
+##hosis
+##lick
+ripley
+##ophone
+kickoff
+gallon
+rockwell
+##arable
+crusader
+glue
+revolutions
+scrambling
+1714
+grover
+##jure
+englishman
+aztec
+263
+contemplating
+coven
+ipad
+preach
+triumphant
+tufts
+##esian
+rotational
+##phus
+328
+falkland
+##brates
+strewn
+clarissa
+rejoin
+environmentally
+glint
+banded
+drenched
+moat
+albanians
+johor
+rr
+maestro
+malley
+nouveau
+shaded
+taxonomy
+v6
+adhere
+bunk
+airfields
+##ritan
+1741
+encompass
+remington
+tran
+##erative
+amelie
+mazda
+friar
+morals
+passions
+##zai
+breadth
+vis
+##hae
+argus
+burnham
+caressing
+insider
+rudd
+##imov
+##mini
+##rso
+italianate
+murderous
+textual
+wainwright
+armada
+bam
+weave
+timer
+##taken
+##nh
+fra
+##crest
+ardent
+salazar
+taps
+tunis
+##ntino
+allegro
+gland
+philanthropic
+##chester
+implication
+##optera
+esq
+judas
+noticeably
+wynn
+##dara
+inched
+indexed
+crises
+villiers
+bandit
+royalties
+patterned
+cupboard
+interspersed
+accessory
+isla
+kendrick
+entourage
+stitches
+##esthesia
+headwaters
+##ior
+interlude
+distraught
+draught
+1727
+##basket
+biased
+sy
+transient
+triad
+subgenus
+adapting
+kidd
+shortstop
+##umatic
+dimly
+spiked
+mcleod
+reprint
+nellie
+pretoria
+windmill
+##cek
+singled
+##mps
+273
+reunite
+##orous
+747
+bankers
+outlying
+##omp
+##ports
+##tream
+apologies
+cosmetics
+patsy
+##deh
+##ocks
+##yson
+bender
+nantes
+serene
+##nad
+lucha
+mmm
+323
+##cius
+##gli
+cmll
+coinage
+nestor
+juarez
+##rook
+smeared
+sprayed
+twitching
+sterile
+irina
+embodied
+juveniles
+enveloped
+miscellaneous
+cancers
+dq
+gulped
+luisa
+crested
+swat
+donegal
+ref
+##anov
+##acker
+hearst
+mercantile
+##lika
+doorbell
+ua
+vicki
+##alla
+##som
+bilbao
+psychologists
+stryker
+sw
+horsemen
+turkmenistan
+wits
+##national
+anson
+mathew
+screenings
+##umb
+rihanna
+##agne
+##nessy
+aisles
+##iani
+##osphere
+hines
+kenton
+saskatoon
+tasha
+truncated
+##champ
+##itan
+mildred
+advises
+fredrik
+interpreting
+inhibitors
+##athi
+spectroscopy
+##hab
+##kong
+karim
+panda
+##oia
+##nail
+##vc
+conqueror
+kgb
+leukemia
+##dity
+arrivals
+cheered
+pisa
+phosphorus
+shielded
+##riated
+mammal
+unitarian
+urgently
+chopin
+sanitary
+##mission
+spicy
+drugged
+hinges
+##tort
+tipping
+trier
+impoverished
+westchester
+##caster
+267
+epoch
+nonstop
+##gman
+##khov
+aromatic
+centrally
+cerro
+##tively
+##vio
+billions
+modulation
+sedimentary
+283
+facilitating
+outrageous
+goldstein
+##eak
+##kt
+ld
+maitland
+penultimate
+pollard
+##dance
+fleets
+spaceship
+vertebrae
+##nig
+alcoholism
+als
+recital
+##bham
+##ference
+##omics
+m2
+##bm
+trois
+##tropical
+##в
+commemorates
+##meric
+marge
+##raction
+1643
+670
+cosmetic
+ravaged
+##ige
+catastrophe
+eng
+##shida
+albrecht
+arterial
+bellamy
+decor
+harmon
+##rde
+bulbs
+synchronized
+vito
+easiest
+shetland
+shielding
+wnba
+##glers
+##ssar
+##riam
+brianna
+cumbria
+##aceous
+##rard
+cores
+thayer
+##nsk
+brood
+hilltop
+luminous
+carts
+keynote
+larkin
+logos
+##cta
+##ا
+##mund
+##quay
+lilith
+tinted
+277
+wrestle
+mobilization
+##uses
+sequential
+siam
+bloomfield
+takahashi
+274
+##ieving
+presenters
+ringo
+blazed
+witty
+##oven
+##ignant
+devastation
+haydn
+harmed
+newt
+therese
+##peed
+gershwin
+molina
+rabbis
+sudanese
+001
+innate
+restarted
+##sack
+##fus
+slices
+wb
+##shah
+enroll
+hypothetical
+hysterical
+1743
+fabio
+indefinite
+warped
+##hg
+exchanging
+525
+unsuitable
+##sboro
+gallo
+1603
+bret
+cobalt
+homemade
+##hunter
+mx
+operatives
+##dhar
+terraces
+durable
+latch
+pens
+whorls
+##ctuated
+##eaux
+billing
+ligament
+succumbed
+##gly
+regulators
+spawn
+##brick
+##stead
+filmfare
+rochelle
+##nzo
+1725
+circumstance
+saber
+supplements
+##nsky
+##tson
+crowe
+wellesley
+carrot
+##9th
+##movable
+primate
+drury
+sincerely
+topical
+##mad
+##rao
+callahan
+kyiv
+smarter
+tits
+undo
+##yeh
+announcements
+anthologies
+barrio
+nebula
+##islaus
+##shaft
+##tyn
+bodyguards
+2021
+assassinate
+barns
+emmett
+scully
+##mah
+##yd
+##eland
+##tino
+##itarian
+demoted
+gorman
+lashed
+prized
+adventist
+writ
+##gui
+alla
+invertebrates
+##ausen
+1641
+amman
+1742
+align
+healy
+redistribution
+##gf
+##rize
+insulation
+##drop
+adherents
+hezbollah
+vitro
+ferns
+yanking
+269
+php
+registering
+uppsala
+cheerleading
+confines
+mischievous
+tully
+##ross
+49th
+docked
+roam
+stipulated
+pumpkin
+##bry
+prompt
+##ezer
+blindly
+shuddering
+craftsmen
+frail
+scented
+katharine
+scramble
+shaggy
+sponge
+helix
+zaragoza
+279
+##52
+43rd
+backlash
+fontaine
+seizures
+posse
+cowan
+nonfiction
+telenovela
+wwii
+hammered
+undone
+##gpur
+encircled
+irs
+##ivation
+artefacts
+oneself
+searing
+smallpox
+##belle
+##osaurus
+shandong
+breached
+upland
+blushing
+rankin
+infinitely
+psyche
+tolerated
+docking
+evicted
+##col
+unmarked
+##lving
+gnome
+lettering
+litres
+musique
+##oint
+benevolent
+##jal
+blackened
+##anna
+mccall
+racers
+tingle
+##ocene
+##orestation
+introductions
+radically
+292
+##hiff
+##باد
+1610
+1739
+munchen
+plead
+##nka
+condo
+scissors
+##sight
+##tens
+apprehension
+##cey
+##yin
+hallmark
+watering
+formulas
+sequels
+##llas
+aggravated
+bae
+commencing
+##building
+enfield
+prohibits
+marne
+vedic
+civilized
+euclidean
+jagger
+beforehand
+blasts
+dumont
+##arney
+##nem
+740
+conversions
+hierarchical
+rios
+simulator
+##dya
+##lellan
+hedges
+oleg
+thrusts
+shadowed
+darby
+maximize
+1744
+gregorian
+##nded
+##routed
+sham
+unspecified
+##hog
+emory
+factual
+##smo
+##tp
+fooled
+##rger
+ortega
+wellness
+marlon
+##oton
+##urance
+casket
+keating
+ley
+enclave
+##ayan
+char
+influencing
+jia
+##chenko
+412
+ammonia
+erebidae
+incompatible
+violins
+cornered
+##arat
+grooves
+astronauts
+columbian
+rampant
+fabrication
+kyushu
+mahmud
+vanish
+##dern
+mesopotamia
+##lete
+ict
+##rgen
+caspian
+kenji
+pitted
+##vered
+999
+grimace
+roanoke
+tchaikovsky
+twinned
+##analysis
+##awan
+xinjiang
+arias
+clemson
+kazakh
+sizable
+1662
+##khand
+##vard
+plunge
+tatum
+vittorio
+##nden
+cholera
+##dana
+##oper
+bracing
+indifference
+projectile
+superliga
+##chee
+realises
+upgrading
+299
+porte
+retribution
+##vies
+nk
+stil
+##resses
+ama
+bureaucracy
+blackberry
+bosch
+testosterone
+collapses
+greer
+##pathic
+ioc
+fifties
+malls
+##erved
+bao
+baskets
+adolescents
+siegfried
+##osity
+##tosis
+mantra
+detecting
+existent
+fledgling
+##cchi
+dissatisfied
+gan
+telecommunication
+mingled
+sobbed
+6000
+controversies
+outdated
+taxis
+##raus
+fright
+slams
+##lham
+##fect
+##tten
+detectors
+fetal
+tanned
+##uw
+fray
+goth
+olympian
+skipping
+mandates
+scratches
+sheng
+unspoken
+hyundai
+tracey
+hotspur
+restrictive
+##buch
+americana
+mundo
+##bari
+burroughs
+diva
+vulcan
+##6th
+distinctions
+thumping
+##ngen
+mikey
+sheds
+fide
+rescues
+springsteen
+vested
+valuation
+##ece
+##ely
+pinnacle
+rake
+sylvie
+##edo
+almond
+quivering
+##irus
+alteration
+faltered
+##wad
+51st
+hydra
+ticked
+##kato
+recommends
+##dicated
+antigua
+arjun
+stagecoach
+wilfred
+trickle
+pronouns
+##pon
+aryan
+nighttime
+##anian
+gall
+pea
+stitch
+##hei
+leung
+milos
+##dini
+eritrea
+nexus
+starved
+snowfall
+kant
+parasitic
+cot
+discus
+hana
+strikers
+appleton
+kitchens
+##erina
+##partisan
+##itha
+##vius
+disclose
+metis
+##channel
+1701
+tesla
+##vera
+fitch
+1735
+blooded
+##tila
+decimal
+##tang
+##bai
+cyclones
+eun
+bottled
+peas
+pensacola
+basha
+bolivian
+crabs
+boil
+lanterns
+partridge
+roofed
+1645
+necks
+##phila
+opined
+patting
+##kla
+##lland
+chuckles
+volta
+whereupon
+##nche
+devout
+euroleague
+suicidal
+##dee
+inherently
+involuntary
+knitting
+nasser
+##hide
+puppets
+colourful
+courageous
+southend
+stills
+miraculous
+hodgson
+richer
+rochdale
+ethernet
+greta
+uniting
+prism
+umm
+##haya
+##itical
+##utation
+deterioration
+pointe
+prowess
+##ropriation
+lids
+scranton
+billings
+subcontinent
+##koff
+##scope
+brute
+kellogg
+psalms
+degraded
+##vez
+stanisław
+##ructured
+ferreira
+pun
+astonishing
+gunnar
+##yat
+arya
+prc
+gottfried
+##tight
+excursion
+##ographer
+dina
+##quil
+##nare
+huffington
+illustrious
+wilbur
+gundam
+verandah
+##zard
+naacp
+##odle
+constructive
+fjord
+kade
+##naud
+generosity
+thrilling
+baseline
+cayman
+frankish
+plastics
+accommodations
+zoological
+##fting
+cedric
+qb
+motorized
+##dome
+##otted
+squealed
+tackled
+canucks
+budgets
+situ
+asthma
+dail
+gabled
+grasslands
+whimpered
+writhing
+judgments
+##65
+minnie
+pv
+##carbon
+bananas
+grille
+domes
+monique
+odin
+maguire
+markham
+tierney
+##estra
+##chua
+libel
+poke
+speedy
+atrium
+laval
+notwithstanding
+##edly
+fai
+kala
+##sur
+robb
+##sma
+listings
+luz
+supplementary
+tianjin
+##acing
+enzo
+jd
+ric
+scanner
+croats
+transcribed
+##49
+arden
+cv
+##hair
+##raphy
+##lver
+##uy
+357
+seventies
+staggering
+alam
+horticultural
+hs
+regression
+timbers
+blasting
+##ounded
+montagu
+manipulating
+##cit
+catalytic
+1550
+troopers
+##meo
+condemnation
+fitzpatrick
+##oire
+##roved
+inexperienced
+1670
+castes
+##lative
+outing
+314
+dubois
+flicking
+quarrel
+ste
+learners
+1625
+iq
+whistled
+##class
+282
+classify
+tariffs
+temperament
+355
+folly
+liszt
+##yles
+immersed
+jordanian
+ceasefire
+apparel
+extras
+maru
+fished
+##bio
+harta
+stockport
+assortment
+craftsman
+paralysis
+transmitters
+##cola
+blindness
+##wk
+fatally
+proficiency
+solemnly
+##orno
+repairing
+amore
+groceries
+ultraviolet
+##chase
+schoolhouse
+##tua
+resurgence
+nailed
+##otype
+##×
+ruse
+saliva
+diagrams
+##tructing
+albans
+rann
+thirties
+1b
+antennas
+hilarious
+cougars
+paddington
+stats
+##eger
+breakaway
+ipod
+reza
+authorship
+prohibiting
+scoffed
+##etz
+##ttle
+conscription
+defected
+trondheim
+##fires
+ivanov
+keenan
+##adan
+##ciful
+##fb
+##slow
+locating
+##ials
+##tford
+cadiz
+basalt
+blankly
+interned
+rags
+rattling
+##tick
+carpathian
+reassured
+sync
+bum
+guildford
+iss
+staunch
+##onga
+astronomers
+sera
+sofie
+emergencies
+susquehanna
+##heard
+duc
+mastery
+vh1
+williamsburg
+bayer
+buckled
+craving
+##khan
+##rdes
+bloomington
+##write
+alton
+barbecue
+##bians
+justine
+##hri
+##ndt
+delightful
+smartphone
+newtown
+photon
+retrieval
+peugeot
+hissing
+##monium
+##orough
+flavors
+lighted
+relaunched
+tainted
+##games
+##lysis
+anarchy
+microscopic
+hopping
+adept
+evade
+evie
+##beau
+inhibit
+sinn
+adjustable
+hurst
+intuition
+wilton
+cisco
+44th
+lawful
+lowlands
+stockings
+thierry
+##dalen
+##hila
+##nai
+fates
+prank
+tb
+maison
+lobbied
+provocative
+1724
+4a
+utopia
+##qual
+carbonate
+gujarati
+purcell
+##rford
+curtiss
+##mei
+overgrown
+arenas
+mediation
+swallows
+##rnik
+respectful
+turnbull
+##hedron
+##hope
+alyssa
+ozone
+##ʻi
+ami
+gestapo
+johansson
+snooker
+canteen
+cuff
+declines
+empathy
+stigma
+##ags
+##iner
+##raine
+taxpayers
+gui
+volga
+##wright
+##copic
+lifespan
+overcame
+tattooed
+enactment
+giggles
+##ador
+##camp
+barrington
+bribe
+obligatory
+orbiting
+peng
+##enas
+elusive
+sucker
+##vating
+cong
+hardship
+empowered
+anticipating
+estrada
+cryptic
+greasy
+detainees
+planck
+sudbury
+plaid
+dod
+marriott
+kayla
+##ears
+##vb
+##zd
+mortally
+##hein
+cognition
+radha
+319
+liechtenstein
+meade
+richly
+argyle
+harpsichord
+liberalism
+trumpets
+lauded
+tyrant
+salsa
+tiled
+lear
+promoters
+reused
+slicing
+trident
+##chuk
+##gami
+##lka
+cantor
+checkpoint
+##points
+gaul
+leger
+mammalian
+##tov
+##aar
+##schaft
+doha
+frenchman
+nirvana
+##vino
+delgado
+headlining
+##eron
+##iography
+jug
+tko
+1649
+naga
+intersections
+##jia
+benfica
+nawab
+##suka
+ashford
+gulp
+##deck
+##vill
+##rug
+brentford
+frazier
+pleasures
+dunne
+potsdam
+shenzhen
+dentistry
+##tec
+flanagan
+##dorff
+##hear
+chorale
+dinah
+prem
+quezon
+##rogated
+relinquished
+sutra
+terri
+##pani
+flaps
+##rissa
+poly
+##rnet
+homme
+aback
+##eki
+linger
+womb
+##kson
+##lewood
+doorstep
+orthodoxy
+threaded
+westfield
+##rval
+dioceses
+fridays
+subsided
+##gata
+loyalists
+##biotic
+##ettes
+letterman
+lunatic
+prelate
+tenderly
+invariably
+souza
+thug
+winslow
+##otide
+furlongs
+gogh
+jeopardy
+##runa
+pegasus
+##umble
+humiliated
+standalone
+tagged
+##roller
+freshmen
+klan
+##bright
+attaining
+initiating
+transatlantic
+logged
+viz
+##uance
+1723
+combatants
+intervening
+stephane
+chieftain
+despised
+grazed
+317
+cdc
+galveston
+godzilla
+macro
+simulate
+##planes
+parades
+##esses
+960
+##ductive
+##unes
+equator
+overdose
+##cans
+##hosh
+##lifting
+joshi
+epstein
+sonora
+treacherous
+aquatics
+manchu
+responsive
+##sation
+supervisory
+##christ
+##llins
+##ibar
+##balance
+##uso
+kimball
+karlsruhe
+mab
+##emy
+ignores
+phonetic
+reuters
+spaghetti
+820
+almighty
+danzig
+rumbling
+tombstone
+designations
+lured
+outset
+##felt
+supermarkets
+##wt
+grupo
+kei
+kraft
+susanna
+##blood
+comprehension
+genealogy
+##aghan
+##verted
+redding
+##ythe
+1722
+bowing
+##pore
+##roi
+lest
+sharpened
+fulbright
+valkyrie
+sikhs
+##unds
+swans
+bouquet
+merritt
+##tage
+##venting
+commuted
+redhead
+clerks
+leasing
+cesare
+dea
+hazy
+##vances
+fledged
+greenfield
+servicemen
+##gical
+armando
+blackout
+dt
+sagged
+downloadable
+intra
+potion
+pods
+##4th
+##mism
+xp
+attendants
+gambia
+stale
+##ntine
+plump
+asteroids
+rediscovered
+buds
+flea
+hive
+##neas
+1737
+classifications
+debuts
+##eles
+olympus
+scala
+##eurs
+##gno
+##mute
+hummed
+sigismund
+visuals
+wiggled
+await
+pilasters
+clench
+sulfate
+##ances
+bellevue
+enigma
+trainee
+snort
+##sw
+clouded
+denim
+##rank
+##rder
+churning
+hartman
+lodges
+riches
+sima
+##missible
+accountable
+socrates
+regulates
+mueller
+##cr
+1702
+avoids
+solids
+himalayas
+nutrient
+pup
+##jevic
+squat
+fades
+nec
+##lates
+##pina
+##rona
+##ου
+privateer
+tequila
+##gative
+##mpton
+apt
+hornet
+immortals
+##dou
+asturias
+cleansing
+dario
+##rries
+##anta
+etymology
+servicing
+zhejiang
+##venor
+##nx
+horned
+erasmus
+rayon
+relocating
+£10
+##bags
+escalated
+promenade
+stubble
+2010s
+artisans
+axial
+liquids
+mora
+sho
+yoo
+##tsky
+bundles
+oldies
+##nally
+notification
+bastion
+##ths
+sparkle
+##lved
+1728
+leash
+pathogen
+highs
+##hmi
+immature
+880
+gonzaga
+ignatius
+mansions
+monterrey
+sweets
+bryson
+##loe
+polled
+regatta
+brightest
+pei
+rosy
+squid
+hatfield
+payroll
+addict
+meath
+cornerback
+heaviest
+lodging
+##mage
+capcom
+rippled
+##sily
+barnet
+mayhem
+ymca
+snuggled
+rousseau
+##cute
+blanchard
+284
+fragmented
+leighton
+chromosomes
+risking
+##md
+##strel
+##utter
+corinne
+coyotes
+cynical
+hiroshi
+yeomanry
+##ractive
+ebook
+grading
+mandela
+plume
+agustin
+magdalene
+##rkin
+bea
+femme
+trafford
+##coll
+##lun
+##tance
+52nd
+fourier
+upton
+##mental
+camilla
+gust
+iihf
+islamabad
+longevity
+##kala
+feldman
+netting
+##rization
+endeavour
+foraging
+mfa
+orr
+##open
+greyish
+contradiction
+graz
+##ruff
+handicapped
+marlene
+tweed
+oaxaca
+spp
+campos
+miocene
+pri
+configured
+cooks
+pluto
+cozy
+pornographic
+##entes
+70th
+fairness
+glided
+jonny
+lynne
+rounding
+sired
+##emon
+##nist
+remade
+uncover
+##mack
+complied
+lei
+newsweek
+##jured
+##parts
+##enting
+##pg
+293
+finer
+guerrillas
+athenian
+deng
+disused
+stepmother
+accuse
+gingerly
+seduction
+521
+confronting
+##walker
+##going
+gora
+nostalgia
+sabres
+virginity
+wrenched
+##minated
+syndication
+wielding
+eyre
+##56
+##gnon
+##igny
+behaved
+taxpayer
+sweeps
+##growth
+childless
+gallant
+##ywood
+amplified
+geraldine
+scrape
+##ffi
+babylonian
+fresco
+##rdan
+##kney
+##position
+1718
+restricting
+tack
+fukuoka
+osborn
+selector
+partnering
+##dlow
+318
+gnu
+kia
+tak
+whitley
+gables
+##54
+##mania
+mri
+softness
+immersion
+##bots
+##evsky
+1713
+chilling
+insignificant
+pcs
+##uis
+elites
+lina
+purported
+supplemental
+teaming
+##americana
+##dding
+##inton
+proficient
+rouen
+##nage
+##rret
+niccolo
+selects
+##bread
+fluffy
+1621
+gruff
+knotted
+mukherjee
+polgara
+thrash
+nicholls
+secluded
+smoothing
+thru
+corsica
+loaf
+whitaker
+inquiries
+##rrier
+##kam
+indochina
+289
+marlins
+myles
+peking
+##tea
+extracts
+pastry
+superhuman
+connacht
+vogel
+##ditional
+##het
+##udged
+##lash
+gloss
+quarries
+refit
+teaser
+##alic
+##gaon
+20s
+materialized
+sling
+camped
+pickering
+tung
+tracker
+pursuant
+##cide
+cranes
+soc
+##cini
+##typical
+##viere
+anhalt
+overboard
+workout
+chores
+fares
+orphaned
+stains
+##logie
+fenton
+surpassing
+joyah
+triggers
+##itte
+grandmaster
+##lass
+##lists
+clapping
+fraudulent
+ledger
+nagasaki
+##cor
+##nosis
+##tsa
+eucalyptus
+tun
+##icio
+##rney
+##tara
+dax
+heroism
+ina
+wrexham
+onboard
+unsigned
+##dates
+moshe
+galley
+winnie
+droplets
+exiles
+praises
+watered
+noodles
+##aia
+fein
+adi
+leland
+multicultural
+stink
+bingo
+comets
+erskine
+modernized
+canned
+constraint
+domestically
+chemotherapy
+featherweight
+stifled
+##mum
+darkly
+irresistible
+refreshing
+hasty
+isolate
+##oys
+kitchener
+planners
+##wehr
+cages
+yarn
+implant
+toulon
+elects
+childbirth
+yue
+##lind
+##lone
+cn
+rightful
+sportsman
+junctions
+remodeled
+specifies
+##rgh
+291
+##oons
+complimented
+##urgent
+lister
+ot
+##logic
+bequeathed
+cheekbones
+fontana
+gabby
+##dial
+amadeus
+corrugated
+maverick
+resented
+triangles
+##hered
+##usly
+nazareth
+tyrol
+1675
+assent
+poorer
+sectional
+aegean
+##cous
+296
+nylon
+ghanaian
+##egorical
+##weig
+cushions
+forbid
+fusiliers
+obstruction
+somerville
+##scia
+dime
+earrings
+elliptical
+leyte
+oder
+polymers
+timmy
+atm
+midtown
+piloted
+settles
+continual
+externally
+mayfield
+##uh
+enrichment
+henson
+keane
+persians
+1733
+benji
+braden
+pep
+324
+##efe
+contenders
+pepsi
+valet
+##isches
+298
+##asse
+##earing
+goofy
+stroll
+##amen
+authoritarian
+occurrences
+adversary
+ahmedabad
+tangent
+toppled
+dorchester
+1672
+modernism
+marxism
+islamist
+charlemagne
+exponential
+racks
+unicode
+brunette
+mbc
+pic
+skirmish
+##bund
+##lad
+##powered
+##yst
+hoisted
+messina
+shatter
+##ctum
+jedi
+vantage
+##music
+##neil
+clemens
+mahmoud
+corrupted
+authentication
+lowry
+nils
+##washed
+omnibus
+wounding
+jillian
+##itors
+##opped
+serialized
+narcotics
+handheld
+##arm
+##plicity
+intersecting
+stimulating
+##onis
+crate
+fellowships
+hemingway
+casinos
+climatic
+fordham
+copeland
+drip
+beatty
+leaflets
+robber
+brothel
+madeira
+##hedral
+sphinx
+ultrasound
+##vana
+valor
+forbade
+leonid
+villas
+##aldo
+duane
+marquez
+##cytes
+disadvantaged
+forearms
+kawasaki
+reacts
+consular
+lax
+uncles
+uphold
+##hopper
+concepcion
+dorsey
+lass
+##izan
+arching
+passageway
+1708
+researches
+tia
+internationals
+##graphs
+##opers
+distinguishes
+javanese
+divert
+##uven
+plotted
+##listic
+##rwin
+##erik
+##tify
+affirmative
+signifies
+validation
+##bson
+kari
+felicity
+georgina
+zulu
+##eros
+##rained
+##rath
+overcoming
+##dot
+argyll
+##rbin
+1734
+chiba
+ratification
+windy
+earls
+parapet
+##marks
+hunan
+pristine
+astrid
+punta
+##gart
+brodie
+##kota
+##oder
+malaga
+minerva
+rouse
+##phonic
+bellowed
+pagoda
+portals
+reclamation
+##gur
+##odies
+##⁄₄
+parentheses
+quoting
+allergic
+palette
+showcases
+benefactor
+heartland
+nonlinear
+##tness
+bladed
+cheerfully
+scans
+##ety
+##hone
+1666
+girlfriends
+pedersen
+hiram
+sous
+##liche
+##nator
+1683
+##nery
+##orio
+##umen
+bobo
+primaries
+smiley
+##cb
+unearthed
+uniformly
+fis
+metadata
+1635
+ind
+##oted
+recoil
+##titles
+##tura
+##ια
+406
+hilbert
+jamestown
+mcmillan
+tulane
+seychelles
+##frid
+antics
+coli
+fated
+stucco
+##grants
+1654
+bulky
+accolades
+arrays
+caledonian
+carnage
+optimism
+puebla
+##tative
+##cave
+enforcing
+rotherham
+seo
+dunlop
+aeronautics
+chimed
+incline
+zoning
+archduke
+hellenistic
+##oses
+##sions
+candi
+thong
+##ople
+magnate
+rustic
+##rsk
+projective
+slant
+##offs
+danes
+hollis
+vocalists
+##ammed
+congenital
+contend
+gesellschaft
+##ocating
+##pressive
+douglass
+quieter
+##cm
+##kshi
+howled
+salim
+spontaneously
+townsville
+buena
+southport
+##bold
+kato
+1638
+faerie
+stiffly
+##vus
+##rled
+297
+flawless
+realising
+taboo
+##7th
+bytes
+straightening
+356
+jena
+##hid
+##rmin
+cartwright
+berber
+bertram
+soloists
+411
+noses
+417
+coping
+fission
+hardin
+inca
+##cen
+1717
+mobilized
+vhf
+##raf
+biscuits
+curate
+##85
+##anial
+331
+gaunt
+neighbourhoods
+1540
+##abas
+blanca
+bypassed
+sockets
+behold
+coincidentally
+##bane
+nara
+shave
+splinter
+terrific
+##arion
+##erian
+commonplace
+juris
+redwood
+waistband
+boxed
+caitlin
+fingerprints
+jennie
+naturalized
+##ired
+balfour
+craters
+jody
+bungalow
+hugely
+quilt
+glitter
+pigeons
+undertaker
+bulging
+constrained
+goo
+##sil
+##akh
+assimilation
+reworked
+##person
+persuasion
+##pants
+felicia
+##cliff
+##ulent
+1732
+explodes
+##dun
+##inium
+##zic
+lyman
+vulture
+hog
+overlook
+begs
+northwards
+ow
+spoil
+##urer
+fatima
+favorably
+accumulate
+sargent
+sorority
+corresponded
+dispersal
+kochi
+toned
+##imi
+##lita
+internacional
+newfound
+##agger
+##lynn
+##rigue
+booths
+peanuts
+##eborg
+medicare
+muriel
+nur
+##uram
+crates
+millennia
+pajamas
+worsened
+##breakers
+jimi
+vanuatu
+yawned
+##udeau
+carousel
+##hony
+hurdle
+##ccus
+##mounted
+##pod
+rv
+##eche
+airship
+ambiguity
+compulsion
+recapture
+##claiming
+arthritis
+##osomal
+1667
+asserting
+ngc
+sniffing
+dade
+discontent
+glendale
+ported
+##amina
+defamation
+rammed
+##scent
+fling
+livingstone
+##fleet
+875
+##ppy
+apocalyptic
+comrade
+lcd
+##lowe
+cessna
+eine
+persecuted
+subsistence
+demi
+hoop
+reliefs
+710
+coptic
+progressing
+stemmed
+perpetrators
+1665
+priestess
+##nio
+dobson
+ebony
+rooster
+itf
+tortricidae
+##bbon
+##jian
+cleanup
+##jean
+##øy
+1721
+eighties
+taxonomic
+holiness
+##hearted
+##spar
+antilles
+showcasing
+stabilized
+##nb
+gia
+mascara
+michelangelo
+dawned
+##uria
+##vinsky
+extinguished
+fitz
+grotesque
+£100
+##fera
+##loid
+##mous
+barges
+neue
+throbbed
+cipher
+johnnie
+##a1
+##mpt
+outburst
+##swick
+spearheaded
+administrations
+c1
+heartbreak
+pixels
+pleasantly
+##enay
+lombardy
+plush
+##nsed
+bobbie
+##hly
+reapers
+tremor
+xiang
+minogue
+substantive
+hitch
+barak
+##wyl
+kwan
+##encia
+910
+obscene
+elegance
+indus
+surfer
+bribery
+conserve
+##hyllum
+##masters
+horatio
+##fat
+apes
+rebound
+psychotic
+##pour
+iteration
+##mium
+##vani
+botanic
+horribly
+antiques
+dispose
+paxton
+##hli
+##wg
+timeless
+1704
+disregard
+engraver
+hounds
+##bau
+##version
+looted
+uno
+facilitates
+groans
+masjid
+rutland
+antibody
+disqualification
+decatur
+footballers
+quake
+slacks
+48th
+rein
+scribe
+stabilize
+commits
+exemplary
+tho
+##hort
+##chison
+pantry
+traversed
+##hiti
+disrepair
+identifiable
+vibrated
+baccalaureate
+##nnis
+csa
+interviewing
+##iensis
+##raße
+greaves
+wealthiest
+343
+classed
+jogged
+£5
+##58
+##atal
+illuminating
+knicks
+respecting
+##uno
+scrubbed
+##iji
+##dles
+kruger
+moods
+growls
+raider
+silvia
+chefs
+kam
+vr
+cree
+percival
+##terol
+gunter
+counterattack
+defiant
+henan
+ze
+##rasia
+##riety
+equivalence
+submissions
+##fra
+##thor
+bautista
+mechanically
+##heater
+cornice
+herbal
+templar
+##mering
+outputs
+ruining
+ligand
+renumbered
+extravagant
+mika
+blockbuster
+eta
+insurrection
+##ilia
+darkening
+ferocious
+pianos
+strife
+kinship
+##aer
+melee
+##anor
+##iste
+##may
+##oue
+decidedly
+weep
+##jad
+##missive
+##ppel
+354
+puget
+unease
+##gnant
+1629
+hammering
+kassel
+ob
+wessex
+##lga
+bromwich
+egan
+paranoia
+utilization
+##atable
+##idad
+contradictory
+provoke
+##ols
+##ouring
+##tangled
+knesset
+##very
+##lette
+plumbing
+##sden
+##¹
+greensboro
+occult
+sniff
+338
+zev
+beaming
+gamer
+haggard
+mahal
+##olt
+##pins
+mendes
+utmost
+briefing
+gunnery
+##gut
+##pher
+##zh
+##rok
+1679
+khalifa
+sonya
+##boot
+principals
+urbana
+wiring
+##liffe
+##minating
+##rrado
+dahl
+nyu
+skepticism
+np
+townspeople
+ithaca
+lobster
+somethin
+##fur
+##arina
+##−1
+freighter
+zimmerman
+biceps
+contractual
+##herton
+amend
+hurrying
+subconscious
+##anal
+336
+meng
+clermont
+spawning
+##eia
+##lub
+dignitaries
+impetus
+snacks
+spotting
+twigs
+##bilis
+##cz
+##ouk
+libertadores
+nic
+skylar
+##aina
+##firm
+gustave
+asean
+##anum
+dieter
+legislatures
+flirt
+bromley
+trolls
+umar
+##bbies
+##tyle
+blah
+parc
+bridgeport
+crank
+negligence
+##nction
+46th
+constantin
+molded
+bandages
+seriousness
+00pm
+siegel
+carpets
+compartments
+upbeat
+statehood
+##dner
+##edging
+marko
+730
+platt
+##hane
+paving
+##iy
+1738
+abbess
+impatience
+limousine
+nbl
+##talk
+441
+lucille
+mojo
+nightfall
+robbers
+##nais
+karel
+brisk
+calves
+replicate
+ascribed
+telescopes
+##olf
+intimidated
+##reen
+ballast
+specialization
+##sit
+aerodynamic
+caliphate
+rainer
+visionary
+##arded
+epsilon
+##aday
+##onte
+aggregation
+auditory
+boosted
+reunification
+kathmandu
+loco
+robyn
+402
+acknowledges
+appointing
+humanoid
+newell
+redeveloped
+restraints
+##tained
+barbarians
+chopper
+1609
+italiana
+##lez
+##lho
+investigates
+wrestlemania
+##anies
+##bib
+690
+##falls
+creaked
+dragoons
+gravely
+minions
+stupidity
+volley
+##harat
+##week
+musik
+##eries
+##uously
+fungal
+massimo
+semantics
+malvern
+##ahl
+##pee
+discourage
+embryo
+imperialism
+1910s
+profoundly
+##ddled
+jiangsu
+sparkled
+stat
+##holz
+sweatshirt
+tobin
+##iction
+sneered
+##cheon
+##oit
+brit
+causal
+smyth
+##neuve
+diffuse
+perrin
+silvio
+##ipes
+##recht
+detonated
+iqbal
+selma
+##nism
+##zumi
+roasted
+##riders
+tay
+##ados
+##mament
+##mut
+##rud
+840
+completes
+nipples
+cfa
+flavour
+hirsch
+##laus
+calderon
+sneakers
+moravian
+##ksha
+1622
+rq
+294
+##imeters
+bodo
+##isance
+##pre
+##ronia
+anatomical
+excerpt
+##lke
+dh
+kunst
+##tablished
+##scoe
+biomass
+panted
+unharmed
+gael
+housemates
+montpellier
+##59
+coa
+rodents
+tonic
+hickory
+singleton
+##taro
+451
+1719
+aldo
+breaststroke
+dempsey
+och
+rocco
+##cuit
+merton
+dissemination
+midsummer
+serials
+##idi
+haji
+polynomials
+##rdon
+gs
+enoch
+prematurely
+shutter
+taunton
+£3
+##grating
+##inates
+archangel
+harassed
+##asco
+326
+archway
+dazzling
+##ecin
+1736
+sumo
+wat
+##kovich
+1086
+honneur
+##ently
+##nostic
+##ttal
+##idon
+1605
+403
+1716
+blogger
+rents
+##gnan
+hires
+##ikh
+##dant
+howie
+##rons
+handler
+retracted
+shocks
+1632
+arun
+duluth
+kepler
+trumpeter
+##lary
+peeking
+seasoned
+trooper
+##mara
+laszlo
+##iciencies
+##rti
+heterosexual
+##inatory
+##ssion
+indira
+jogging
+##inga
+##lism
+beit
+dissatisfaction
+malice
+##ately
+nedra
+peeling
+##rgeon
+47th
+stadiums
+475
+vertigo
+##ains
+iced
+restroom
+##plify
+##tub
+illustrating
+pear
+##chner
+##sibility
+inorganic
+rappers
+receipts
+watery
+##kura
+lucinda
+##oulos
+reintroduced
+##8th
+##tched
+gracefully
+saxons
+nutritional
+wastewater
+rained
+favourites
+bedrock
+fisted
+hallways
+likeness
+upscale
+##lateral
+1580
+blinds
+prequel
+##pps
+##tama
+deter
+humiliating
+restraining
+tn
+vents
+1659
+laundering
+recess
+rosary
+tractors
+coulter
+federer
+##ifiers
+##plin
+persistence
+##quitable
+geschichte
+pendulum
+quakers
+##beam
+bassett
+pictorial
+buffet
+koln
+##sitor
+drills
+reciprocal
+shooters
+##57
+##cton
+##tees
+converge
+pip
+dmitri
+donnelly
+yamamoto
+aqua
+azores
+demographics
+hypnotic
+spitfire
+suspend
+wryly
+roderick
+##rran
+sebastien
+##asurable
+mavericks
+##fles
+##200
+himalayan
+prodigy
+##iance
+transvaal
+demonstrators
+handcuffs
+dodged
+mcnamara
+sublime
+1726
+crazed
+##efined
+##till
+ivo
+pondered
+reconciled
+shrill
+sava
+##duk
+bal
+cad
+heresy
+jaipur
+goran
+##nished
+341
+lux
+shelly
+whitehall
+##hre
+israelis
+peacekeeping
+##wled
+1703
+demetrius
+ousted
+##arians
+##zos
+beale
+anwar
+backstroke
+raged
+shrinking
+cremated
+##yck
+benign
+towing
+wadi
+darmstadt
+landfill
+parana
+soothe
+colleen
+sidewalks
+mayfair
+tumble
+hepatitis
+ferrer
+superstructure
+##gingly
+##urse
+##wee
+anthropological
+translators
+##mies
+closeness
+hooves
+##pw
+mondays
+##roll
+##vita
+landscaping
+##urized
+purification
+sock
+thorns
+thwarted
+jalan
+tiberius
+##taka
+saline
+##rito
+confidently
+khyber
+sculptors
+##ij
+brahms
+hammersmith
+inspectors
+battista
+fivb
+fragmentation
+hackney
+##uls
+arresting
+exercising
+antoinette
+bedfordshire
+##zily
+dyed
+##hema
+1656
+racetrack
+variability
+##tique
+1655
+austrians
+deteriorating
+madman
+theorists
+aix
+lehman
+weathered
+1731
+decreed
+eruptions
+1729
+flaw
+quinlan
+sorbonne
+flutes
+nunez
+1711
+adored
+downwards
+fable
+rasped
+1712
+moritz
+mouthful
+renegade
+shivers
+stunts
+dysfunction
+restrain
+translit
+327
+pancakes
+##avio
+##cision
+##tray
+351
+vial
+##lden
+bain
+##maid
+##oxide
+chihuahua
+malacca
+vimes
+##rba
+##rnier
+1664
+donnie
+plaques
+##ually
+337
+bangs
+floppy
+huntsville
+loretta
+nikolay
+##otte
+eater
+handgun
+ubiquitous
+##hett
+eras
+zodiac
+1634
+##omorphic
+1820s
+##zog
+cochran
+##bula
+##lithic
+warring
+##rada
+dalai
+excused
+blazers
+mcconnell
+reeling
+bot
+este
+##abi
+geese
+hoax
+taxon
+##bla
+guitarists
+##icon
+condemning
+hunts
+inversion
+moffat
+taekwondo
+##lvis
+1624
+stammered
+##rest
+##rzy
+sousa
+fundraiser
+marylebone
+navigable
+uptown
+cabbage
+daniela
+salman
+shitty
+whimper
+##kian
+##utive
+programmers
+protections
+rm
+##rmi
+##rued
+forceful
+##enes
+fuss
+##tao
+##wash
+brat
+oppressive
+reykjavik
+spartak
+ticking
+##inkles
+##kiewicz
+adolph
+horst
+maui
+protege
+straighten
+cpc
+landau
+concourse
+clements
+resultant
+##ando
+imaginative
+joo
+reactivated
+##rem
+##ffled
+##uising
+consultative
+##guide
+flop
+kaitlyn
+mergers
+parenting
+somber
+##vron
+supervise
+vidhan
+##imum
+courtship
+exemplified
+harmonies
+medallist
+refining
+##rrow
+##ка
+amara
+##hum
+780
+goalscorer
+sited
+overshadowed
+rohan
+displeasure
+secretive
+multiplied
+osman
+##orth
+engravings
+padre
+##kali
+##veda
+miniatures
+mis
+##yala
+clap
+pali
+rook
+##cana
+1692
+57th
+antennae
+astro
+oskar
+1628
+bulldog
+crotch
+hackett
+yucatan
+##sure
+amplifiers
+brno
+ferrara
+migrating
+##gree
+thanking
+turing
+##eza
+mccann
+ting
+andersson
+onslaught
+gaines
+ganga
+incense
+standardization
+##mation
+sentai
+scuba
+stuffing
+turquoise
+waivers
+alloys
+##vitt
+regaining
+vaults
+##clops
+##gizing
+digger
+furry
+memorabilia
+probing
+##iad
+payton
+rec
+deutschland
+filippo
+opaque
+seamen
+zenith
+afrikaans
+##filtration
+disciplined
+inspirational
+##merie
+banco
+confuse
+grafton
+tod
+##dgets
+championed
+simi
+anomaly
+biplane
+##ceptive
+electrode
+##para
+1697
+cleavage
+crossbow
+swirl
+informant
+##lars
+##osta
+afi
+bonfire
+spec
+##oux
+lakeside
+slump
+##culus
+##lais
+##qvist
+##rrigan
+1016
+facades
+borg
+inwardly
+cervical
+xl
+pointedly
+050
+stabilization
+##odon
+chests
+1699
+hacked
+ctv
+orthogonal
+suzy
+##lastic
+gaulle
+jacobite
+rearview
+##cam
+##erted
+ashby
+##drik
+##igate
+##mise
+##zbek
+affectionately
+canine
+disperse
+latham
+##istles
+##ivar
+spielberg
+##orin
+##idium
+ezekiel
+cid
+##sg
+durga
+middletown
+##cina
+customized
+frontiers
+harden
+##etano
+##zzy
+1604
+bolsheviks
+##66
+coloration
+yoko
+##bedo
+briefs
+slabs
+debra
+liquidation
+plumage
+##oin
+blossoms
+dementia
+subsidy
+1611
+proctor
+relational
+jerseys
+parochial
+ter
+##ici
+esa
+peshawar
+cavalier
+loren
+cpi
+idiots
+shamrock
+1646
+dutton
+malabar
+mustache
+##endez
+##ocytes
+referencing
+terminates
+marche
+yarmouth
+##sop
+acton
+mated
+seton
+subtly
+baptised
+beige
+extremes
+jolted
+kristina
+telecast
+##actic
+safeguard
+waldo
+##baldi
+##bular
+endeavors
+sloppy
+subterranean
+##ensburg
+##itung
+delicately
+pigment
+tq
+##scu
+1626
+##ound
+collisions
+coveted
+herds
+##personal
+##meister
+##nberger
+chopra
+##ricting
+abnormalities
+defective
+galician
+lucie
+##dilly
+alligator
+likened
+##genase
+burundi
+clears
+complexion
+derelict
+deafening
+diablo
+fingered
+champaign
+dogg
+enlist
+isotope
+labeling
+mrna
+##erre
+brilliance
+marvelous
+##ayo
+1652
+crawley
+ether
+footed
+dwellers
+deserts
+hamish
+rubs
+warlock
+skimmed
+##lizer
+870
+buick
+embark
+heraldic
+irregularities
+##ajan
+kiara
+##kulam
+##ieg
+antigen
+kowalski
+##lge
+oakley
+visitation
+##mbit
+vt
+##suit
+1570
+murderers
+##miento
+##rites
+chimneys
+##sling
+condemn
+custer
+exchequer
+havre
+##ghi
+fluctuations
+##rations
+dfb
+hendricks
+vaccines
+##tarian
+nietzsche
+biking
+juicy
+##duced
+brooding
+scrolling
+selangor
+##ragan
+352
+annum
+boomed
+seminole
+sugarcane
+##dna
+departmental
+dismissing
+innsbruck
+arteries
+ashok
+batavia
+daze
+kun
+overtook
+##rga
+##tlan
+beheaded
+gaddafi
+holm
+electronically
+faulty
+galilee
+fractures
+kobayashi
+##lized
+gunmen
+magma
+aramaic
+mala
+eastenders
+inference
+messengers
+bf
+##qu
+407
+bathrooms
+##vere
+1658
+flashbacks
+ideally
+misunderstood
+##jali
+##weather
+mendez
+##grounds
+505
+uncanny
+##iii
+1709
+friendships
+##nbc
+sacrament
+accommodated
+reiterated
+logistical
+pebbles
+thumped
+##escence
+administering
+decrees
+drafts
+##flight
+##cased
+##tula
+futuristic
+picket
+intimidation
+winthrop
+##fahan
+interfered
+339
+afar
+francoise
+morally
+uta
+cochin
+croft
+dwarfs
+##bruck
+##dents
+##nami
+biker
+##hner
+##meral
+nano
+##isen
+##ometric
+##pres
+##ан
+brightened
+meek
+parcels
+securely
+gunners
+##jhl
+##zko
+agile
+hysteria
+##lten
+##rcus
+bukit
+champs
+chevy
+cuckoo
+leith
+sadler
+theologians
+welded
+##section
+1663
+jj
+plurality
+xander
+##rooms
+##formed
+shredded
+temps
+intimately
+pau
+tormented
+##lok
+##stellar
+1618
+charred
+ems
+essen
+##mmel
+alarms
+spraying
+ascot
+blooms
+twinkle
+##abia
+##apes
+internment
+obsidian
+##chaft
+snoop
+##dav
+##ooping
+malibu
+##tension
+quiver
+##itia
+hays
+mcintosh
+travers
+walsall
+##ffie
+1623
+beverley
+schwarz
+plunging
+structurally
+m3
+rosenthal
+vikram
+##tsk
+770
+ghz
+##onda
+##tiv
+chalmers
+groningen
+pew
+reckon
+unicef
+##rvis
+55th
+##gni
+1651
+sulawesi
+avila
+cai
+metaphysical
+screwing
+turbulence
+##mberg
+augusto
+samba
+56th
+baffled
+momentary
+toxin
+##urian
+##wani
+aachen
+condoms
+dali
+steppe
+##3d
+##app
+##oed
+##year
+adolescence
+dauphin
+electrically
+inaccessible
+microscopy
+nikita
+##ega
+atv
+##cel
+##enter
+##oles
+##oteric
+##ы
+accountants
+punishments
+wrongly
+bribes
+adventurous
+clinch
+flinders
+southland
+##hem
+##kata
+gough
+##ciency
+lads
+soared
+##ה
+undergoes
+deformation
+outlawed
+rubbish
+##arus
+##mussen
+##nidae
+##rzburg
+arcs
+##ingdon
+##tituted
+1695
+wheelbase
+wheeling
+bombardier
+campground
+zebra
+##lices
+##oj
+##bain
+lullaby
+##ecure
+donetsk
+wylie
+grenada
+##arding
+##ης
+squinting
+eireann
+opposes
+##andra
+maximal
+runes
+##broken
+##cuting
+##iface
+##ror
+##rosis
+additive
+britney
+adultery
+triggering
+##drome
+detrimental
+aarhus
+containment
+jc
+swapped
+vichy
+##ioms
+madly
+##oric
+##rag
+brant
+##ckey
+##trix
+1560
+1612
+broughton
+rustling
+##stems
+##uder
+asbestos
+mentoring
+##nivorous
+finley
+leaps
+##isan
+apical
+pry
+slits
+substitutes
+##dict
+intuitive
+fantasia
+insistent
+unreasonable
+##igen
+##vna
+domed
+hannover
+margot
+ponder
+##zziness
+impromptu
+jian
+lc
+rampage
+stemming
+##eft
+andrey
+gerais
+whichever
+amnesia
+appropriated
+anzac
+clicks
+modifying
+ultimatum
+cambrian
+maids
+verve
+yellowstone
+##mbs
+conservatoire
+##scribe
+adherence
+dinners
+spectra
+imperfect
+mysteriously
+sidekick
+tatar
+tuba
+##aks
+##ifolia
+distrust
+##athan
+##zle
+c2
+ronin
+zac
+##pse
+celaena
+instrumentalist
+scents
+skopje
+##mbling
+comical
+compensated
+vidal
+condor
+intersect
+jingle
+wavelengths
+##urrent
+mcqueen
+##izzly
+carp
+weasel
+422
+kanye
+militias
+postdoctoral
+eugen
+gunslinger
+##ɛ
+faux
+hospice
+##for
+appalled
+derivation
+dwarves
+##elis
+dilapidated
+##folk
+astoria
+philology
+##lwyn
+##otho
+##saka
+inducing
+philanthropy
+##bf
+##itative
+geek
+markedly
+sql
+##yce
+bessie
+indices
+rn
+##flict
+495
+frowns
+resolving
+weightlifting
+tugs
+cleric
+contentious
+1653
+mania
+rms
+##miya
+##reate
+##ruck
+##tucket
+bien
+eels
+marek
+##ayton
+##cence
+discreet
+unofficially
+##ife
+leaks
+##bber
+1705
+332
+dung
+compressor
+hillsborough
+pandit
+shillings
+distal
+##skin
+381
+##tat
+##you
+nosed
+##nir
+mangrove
+undeveloped
+##idia
+textures
+##inho
+##500
+##rise
+ae
+irritating
+nay
+amazingly
+bancroft
+apologetic
+compassionate
+kata
+symphonies
+##lovic
+airspace
+##lch
+930
+gifford
+precautions
+fulfillment
+sevilla
+vulgar
+martinique
+##urities
+looting
+piccolo
+tidy
+##dermott
+quadrant
+armchair
+incomes
+mathematicians
+stampede
+nilsson
+##inking
+##scan
+foo
+quarterfinal
+##ostal
+shang
+shouldered
+squirrels
+##owe
+344
+vinegar
+##bner
+##rchy
+##systems
+delaying
+##trics
+ars
+dwyer
+rhapsody
+sponsoring
+##gration
+bipolar
+cinder
+starters
+##olio
+##urst
+421
+signage
+##nty
+aground
+figurative
+mons
+acquaintances
+duets
+erroneously
+soyuz
+elliptic
+recreated
+##cultural
+##quette
+##ssed
+##tma
+##zcz
+moderator
+scares
+##itaire
+##stones
+##udence
+juniper
+sighting
+##just
+##nsen
+britten
+calabria
+ry
+bop
+cramer
+forsyth
+stillness
+##л
+airmen
+gathers
+unfit
+##umber
+##upt
+taunting
+##rip
+seeker
+streamlined
+##bution
+holster
+schumann
+tread
+vox
+##gano
+##onzo
+strive
+dil
+reforming
+covent
+newbury
+predicting
+##orro
+decorate
+tre
+##puted
+andover
+ie
+asahi
+dept
+dunkirk
+gills
+##tori
+buren
+huskies
+##stis
+##stov
+abstracts
+bets
+loosen
+##opa
+1682
+yearning
+##glio
+##sir
+berman
+effortlessly
+enamel
+napoli
+persist
+##peration
+##uez
+attache
+elisa
+b1
+invitations
+##kic
+accelerating
+reindeer
+boardwalk
+clutches
+nelly
+polka
+starbucks
+##kei
+adamant
+huey
+lough
+unbroken
+adventurer
+embroidery
+inspecting
+stanza
+##ducted
+naia
+taluka
+##pone
+##roids
+chases
+deprivation
+florian
+##jing
+##ppet
+earthly
+##lib
+##ssee
+colossal
+foreigner
+vet
+freaks
+patrice
+rosewood
+triassic
+upstate
+##pkins
+dominates
+ata
+chants
+ks
+vo
+##400
+##bley
+##raya
+##rmed
+555
+agra
+infiltrate
+##ailing
+##ilation
+##tzer
+##uppe
+##werk
+binoculars
+enthusiast
+fujian
+squeak
+##avs
+abolitionist
+almeida
+boredom
+hampstead
+marsden
+rations
+##ands
+inflated
+334
+bonuses
+rosalie
+patna
+##rco
+329
+detachments
+penitentiary
+54th
+flourishing
+woolf
+##dion
+##etched
+papyrus
+##lster
+##nsor
+##toy
+bobbed
+dismounted
+endelle
+inhuman
+motorola
+tbs
+wince
+wreath
+##ticus
+hideout
+inspections
+sanjay
+disgrace
+infused
+pudding
+stalks
+##urbed
+arsenic
+leases
+##hyl
+##rrard
+collarbone
+##waite
+##wil
+dowry
+##bant
+##edance
+genealogical
+nitrate
+salamanca
+scandals
+thyroid
+necessitated
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##`
+##{
+##|
+##}
+##~
+##¡
+##¢
+##£
+##¤
+##¥
+##¦
+##§
+##¨
+##©
+##ª
+##«
+##¬
+##®
+##±
+##´
+##µ
+##¶
+##·
+##º
+##»
+##¼
+##¾
+##¿
+##æ
+##ð
+##÷
+##þ
+##đ
+##ħ
+##ŋ
+##œ
+##ƒ
+##ɐ
+##ɑ
+##ɒ
+##ɔ
+##ɕ
+##ə
+##ɡ
+##ɣ
+##ɨ
+##ɪ
+##ɫ
+##ɬ
+##ɯ
+##ɲ
+##ɴ
+##ɹ
+##ɾ
+##ʀ
+##ʁ
+##ʂ
+##ʃ
+##ʉ
+##ʊ
+##ʋ
+##ʌ
+##ʎ
+##ʐ
+##ʑ
+##ʒ
+##ʔ
+##ʰ
+##ʲ
+##ʳ
+##ʷ
+##ʸ
+##ʻ
+##ʼ
+##ʾ
+##ʿ
+##ˈ
+##ˡ
+##ˢ
+##ˣ
+##ˤ
+##β
+##γ
+##δ
+##ε
+##ζ
+##θ
+##κ
+##λ
+##μ
+##ξ
+##ο
+##π
+##ρ
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##ω
+##б
+##г
+##д
+##ж
+##з
+##м
+##п
+##с
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##щ
+##ъ
+##э
+##ю
+##ђ
+##є
+##і
+##ј
+##љ
+##њ
+##ћ
+##ӏ
+##ա
+##բ
+##գ
+##դ
+##ե
+##թ
+##ի
+##լ
+##կ
+##հ
+##մ
+##յ
+##ն
+##ո
+##պ
+##ս
+##վ
+##տ
+##ր
+##ւ
+##ք
+##־
+##א
+##ב
+##ג
+##ד
+##ו
+##ז
+##ח
+##ט
+##י
+##ך
+##כ
+##ל
+##ם
+##מ
+##ן
+##נ
+##ס
+##ע
+##ף
+##פ
+##ץ
+##צ
+##ק
+##ר
+##ש
+##ת
+##،
+##ء
+##ب
+##ت
+##ث
+##ج
+##ح
+##خ
+##ذ
+##ز
+##س
+##ش
+##ص
+##ض
+##ط
+##ظ
+##ع
+##غ
+##ـ
+##ف
+##ق
+##ك
+##و
+##ى
+##ٹ
+##پ
+##چ
+##ک
+##گ
+##ں
+##ھ
+##ہ
+##ے
+##अ
+##आ
+##उ
+##ए
+##क
+##ख
+##ग
+##च
+##ज
+##ट
+##ड
+##ण
+##त
+##थ
+##द
+##ध
+##न
+##प
+##ब
+##भ
+##म
+##य
+##र
+##ल
+##व
+##श
+##ष
+##स
+##ह
+##ा
+##ि
+##ी
+##ो
+##।
+##॥
+##ং
+##অ
+##আ
+##ই
+##উ
+##এ
+##ও
+##ক
+##খ
+##গ
+##চ
+##ছ
+##জ
+##ট
+##ড
+##ণ
+##ত
+##থ
+##দ
+##ধ
+##ন
+##প
+##ব
+##ভ
+##ম
+##য
+##র
+##ল
+##শ
+##ষ
+##স
+##হ
+##া
+##ি
+##ী
+##ে
+##க
+##ச
+##ட
+##த
+##ந
+##ன
+##ப
+##ம
+##ய
+##ர
+##ல
+##ள
+##வ
+##ா
+##ி
+##ு
+##ே
+##ை
+##ನ
+##ರ
+##ಾ
+##ක
+##ය
+##ර
+##ල
+##ව
+##ා
+##ก
+##ง
+##ต
+##ท
+##น
+##พ
+##ม
+##ย
+##ร
+##ล
+##ว
+##ส
+##อ
+##า
+##เ
+##་
+##།
+##ག
+##ང
+##ད
+##ན
+##པ
+##བ
+##མ
+##འ
+##ར
+##ལ
+##ས
+##မ
+##ა
+##ბ
+##გ
+##დ
+##ე
+##ვ
+##თ
+##ი
+##კ
+##ლ
+##მ
+##ნ
+##ო
+##რ
+##ს
+##ტ
+##უ
+##ᄀ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄉ
+##ᄊ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅡ
+##ᅢ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅩ
+##ᅪ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᅵ
+##ᆨ
+##ᆫ
+##ᆯ
+##ᆷ
+##ᆸ
+##ᆼ
+##ᴬ
+##ᴮ
+##ᴰ
+##ᴵ
+##ᴺ
+##ᵀ
+##ᵃ
+##ᵇ
+##ᵈ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵖ
+##ᵗ
+##ᵘ
+##ᵣ
+##ᵤ
+##ᵥ
+##ᶜ
+##ᶠ
+##‐
+##‑
+##‒
+##–
+##—
+##―
+##‖
+##‘
+##’
+##‚
+##“
+##”
+##„
+##†
+##‡
+##•
+##…
+##‰
+##′
+##″
+##›
+##‿
+##⁄
+##⁰
+##ⁱ
+##⁴
+##⁵
+##⁶
+##⁷
+##⁸
+##⁹
+##⁻
+##ⁿ
+##₅
+##₆
+##₇
+##₈
+##₉
+##₊
+##₍
+##₎
+##ₐ
+##ₑ
+##ₒ
+##ₓ
+##ₕ
+##ₖ
+##ₗ
+##ₘ
+##ₚ
+##ₛ
+##ₜ
+##₤
+##₩
+##€
+##₱
+##₹
+##ℓ
+##№
+##ℝ
+##™
+##⅓
+##⅔
+##←
+##↑
+##→
+##↓
+##↔
+##↦
+##⇄
+##⇌
+##⇒
+##∂
+##∅
+##∆
+##∇
+##∈
+##∗
+##∘
+##√
+##∞
+##∧
+##∨
+##∩
+##∪
+##≈
+##≡
+##≤
+##≥
+##⊂
+##⊆
+##⊕
+##⊗
+##⋅
+##─
+##│
+##■
+##▪
+##●
+##★
+##☆
+##☉
+##♠
+##♣
+##♥
+##♦
+##♯
+##⟨
+##⟩
+##ⱼ
+##⺩
+##⺼
+##⽥
+##、
+##。
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##〜
+##あ
+##い
+##う
+##え
+##お
+##か
+##き
+##く
+##け
+##こ
+##さ
+##し
+##す
+##せ
+##そ
+##た
+##ち
+##っ
+##つ
+##て
+##と
+##な
+##に
+##ぬ
+##ね
+##の
+##は
+##ひ
+##ふ
+##へ
+##ほ
+##ま
+##み
+##む
+##め
+##も
+##や
+##ゆ
+##よ
+##ら
+##り
+##る
+##れ
+##ろ
+##を
+##ん
+##ァ
+##ア
+##ィ
+##イ
+##ウ
+##ェ
+##エ
+##オ
+##カ
+##キ
+##ク
+##ケ
+##コ
+##サ
+##シ
+##ス
+##セ
+##タ
+##チ
+##ッ
+##ツ
+##テ
+##ト
+##ナ
+##ニ
+##ノ
+##ハ
+##ヒ
+##フ
+##ヘ
+##ホ
+##マ
+##ミ
+##ム
+##メ
+##モ
+##ャ
+##ュ
+##ョ
+##ラ
+##リ
+##ル
+##レ
+##ロ
+##ワ
+##ン
+##・
+##ー
+##一
+##三
+##上
+##下
+##不
+##世
+##中
+##主
+##久
+##之
+##也
+##事
+##二
+##五
+##井
+##京
+##人
+##亻
+##仁
+##介
+##代
+##仮
+##伊
+##会
+##佐
+##侍
+##保
+##信
+##健
+##元
+##光
+##八
+##公
+##内
+##出
+##分
+##前
+##劉
+##力
+##加
+##勝
+##北
+##区
+##十
+##千
+##南
+##博
+##原
+##口
+##古
+##史
+##司
+##合
+##吉
+##同
+##名
+##和
+##囗
+##四
+##国
+##國
+##土
+##地
+##坂
+##城
+##堂
+##場
+##士
+##夏
+##外
+##大
+##天
+##太
+##夫
+##奈
+##女
+##子
+##学
+##宀
+##宇
+##安
+##宗
+##定
+##宣
+##宮
+##家
+##宿
+##寺
+##將
+##小
+##尚
+##山
+##岡
+##島
+##崎
+##川
+##州
+##巿
+##帝
+##平
+##年
+##幸
+##广
+##弘
+##張
+##彳
+##後
+##御
+##德
+##心
+##忄
+##志
+##忠
+##愛
+##成
+##我
+##戦
+##戸
+##手
+##扌
+##政
+##文
+##新
+##方
+##日
+##明
+##星
+##春
+##昭
+##智
+##曲
+##書
+##月
+##有
+##朝
+##木
+##本
+##李
+##村
+##東
+##松
+##林
+##森
+##楊
+##樹
+##橋
+##歌
+##止
+##正
+##武
+##比
+##氏
+##民
+##水
+##氵
+##氷
+##永
+##江
+##沢
+##河
+##治
+##法
+##海
+##清
+##漢
+##瀬
+##火
+##版
+##犬
+##王
+##生
+##田
+##男
+##疒
+##発
+##白
+##的
+##皇
+##目
+##相
+##省
+##真
+##石
+##示
+##社
+##神
+##福
+##禾
+##秀
+##秋
+##空
+##立
+##章
+##竹
+##糹
+##美
+##義
+##耳
+##良
+##艹
+##花
+##英
+##華
+##葉
+##藤
+##行
+##街
+##西
+##見
+##訁
+##語
+##谷
+##貝
+##貴
+##車
+##軍
+##辶
+##道
+##郎
+##郡
+##部
+##都
+##里
+##野
+##金
+##鈴
+##镇
+##長
+##門
+##間
+##阝
+##阿
+##陳
+##陽
+##雄
+##青
+##面
+##風
+##食
+##香
+##馬
+##高
+##龍
+##龸
+##ﬁ
+##ﬂ
+##！
+##（
+##）
+##，
+##－
+##．
+##／
+##：
+##？
+##～
diff --git a/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json b/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a4807695d56a3aea97a55a9db97ba753e960748
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/cross-base/cross_config.json
@@ -0,0 +1,12 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 1024,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 2,
+  "vocab_size": 768
+}
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json b/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..91c46b63eba081afb28085a6d53f390ada5a5cfe
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/decoder-base/decoder_config.json
@@ -0,0 +1,14 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522,
+  "num_decoder_layers": 1,
+  "max_target_embeddings": 512
+}
diff --git a/yc2_univl/backup/pdvc/modules/file_utils.py b/yc2_univl/backup/pdvc/modules/file_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..43fa8ca87e20ee5333dd84a09795a743bbf3f183
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/file_utils.py
@@ -0,0 +1,239 @@
+"""
+Utilities for working with the local dataset cache.
+This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
+Copyright by the AllenNLP authors.
+"""
+
+import os
+import logging
+import shutil
+import tempfile
+import json
+from urllib.parse import urlparse
+from pathlib import Path
+from typing import Optional, Tuple, Union, IO, Callable, Set
+from hashlib import sha256
+from functools import wraps
+
+from tqdm import tqdm
+
+import boto3
+from botocore.exceptions import ClientError
+import requests
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
+                                               Path.home() / '.pytorch_pretrained_bert'))
+
+
+def url_to_filename(url: str, etag: str = None) -> str:
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the url's, delimited
+    by a period.
+    """
+    url_bytes = url.encode('utf-8')
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode('utf-8')
+        etag_hash = sha256(etag_bytes)
+        filename += '.' + etag_hash.hexdigest()
+
+    return filename
+
+
+def filename_to_url(filename: str, cache_dir: Union[str, Path] = None) -> Tuple[str, str]:
+    """
+    Return the url and etag (which may be ``None``) stored for `filename`.
+    Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    cache_path = os.path.join(cache_dir, filename)
+    if not os.path.exists(cache_path):
+        raise FileNotFoundError("file {} not found".format(cache_path))
+
+    meta_path = cache_path + '.json'
+    if not os.path.exists(meta_path):
+        raise FileNotFoundError("file {} not found".format(meta_path))
+
+    with open(meta_path) as meta_file:
+        metadata = json.load(meta_file)
+    url = metadata['url']
+    etag = metadata['etag']
+
+    return url, etag
+
+
+def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    parsed = urlparse(url_or_filename)
+
+    if parsed.scheme in ('http', 'https', 's3'):
+        # URL, so get it from the cache (downloading if necessary)
+        return get_from_cache(url_or_filename, cache_dir)
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        return url_or_filename
+    elif parsed.scheme == '':
+        # File, but it doesn't exist.
+        raise FileNotFoundError("file {} not found".format(url_or_filename))
+    else:
+        # Something unknown
+        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+
+
+def split_s3_path(url: str) -> Tuple[str, str]:
+    """Split a full s3 path into the bucket name and path."""
+    parsed = urlparse(url)
+    if not parsed.netloc or not parsed.path:
+        raise ValueError("bad s3 path {}".format(url))
+    bucket_name = parsed.netloc
+    s3_path = parsed.path
+    # Remove '/' at beginning of path.
+    if s3_path.startswith("/"):
+        s3_path = s3_path[1:]
+    return bucket_name, s3_path
+
+
+def s3_request(func: Callable):
+    """
+    Wrapper function for s3 requests in order to create more helpful error
+    messages.
+    """
+
+    @wraps(func)
+    def wrapper(url: str, *args, **kwargs):
+        try:
+            return func(url, *args, **kwargs)
+        except ClientError as exc:
+            if int(exc.response["Error"]["Code"]) == 404:
+                raise FileNotFoundError("file {} not found".format(url))
+            else:
+                raise
+
+    return wrapper
+
+
+@s3_request
+def s3_etag(url: str) -> Optional[str]:
+    """Check ETag on S3 object."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_object = s3_resource.Object(bucket_name, s3_path)
+    return s3_object.e_tag
+
+
+@s3_request
+def s3_get(url: str, temp_file: IO) -> None:
+    """Pull a file directly from S3."""
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file)
+
+
+def http_get(url: str, temp_file: IO) -> None:
+    req = requests.get(url, stream=True)
+    content_length = req.headers.get('Content-Length')
+    total = int(content_length) if content_length is not None else None
+    progress = tqdm(unit="B", total=total)
+    for chunk in req.iter_content(chunk_size=1024):
+        if chunk: # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+
+def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str:
+    """
+    Given a URL, look for the corresponding dataset in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    os.makedirs(cache_dir, exist_ok=True)
+
+    # Get eTag to add to filename, if it exists.
+    if url.startswith("s3://"):
+        etag = s3_etag(url)
+    else:
+        response = requests.head(url, allow_redirects=True)
+        if response.status_code != 200:
+            raise IOError("HEAD request failed for url {} with status code {}"
+                          .format(url, response.status_code))
+        etag = response.headers.get("ETag")
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    if not os.path.exists(cache_path):
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with tempfile.NamedTemporaryFile() as temp_file:
+            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
+
+            # GET file object
+            if url.startswith("s3://"):
+                s3_get(url, temp_file)
+            else:
+                http_get(url, temp_file)
+
+            # we are copying the file before closing it, so flush to avoid truncation
+            temp_file.flush()
+            # shutil.copyfileobj() starts at the current position, so go to the start
+            temp_file.seek(0)
+
+            logger.info("copying %s to cache at %s", temp_file.name, cache_path)
+            with open(cache_path, 'wb') as cache_file:
+                shutil.copyfileobj(temp_file, cache_file)
+
+            logger.info("creating metadata file for %s", cache_path)
+            meta = {'url': url, 'etag': etag}
+            meta_path = cache_path + '.json'
+            with open(meta_path, 'w') as meta_file:
+                json.dump(meta, meta_file)
+
+            logger.info("removing temp file %s", temp_file.name)
+
+    return cache_path
+
+
+def read_set_from_file(filename: str) -> Set[str]:
+    '''
+    Extract a de-duped collection (set) of text from a file.
+    Expected file format is one item per line.
+    '''
+    collection = set()
+    with open(filename, 'r', encoding='utf-8') as file_:
+        for line in file_:
+            collection.add(line.rstrip())
+    return collection
+
+
+def get_file_extension(path: str, dot=True, lower: bool = True):
+    ext = os.path.splitext(path)[1]
+    ext = ext if dot else ext[1:]
+    return ext.lower() if lower else ext
diff --git a/yc2_univl/backup/pdvc/modules/modeling.py b/yc2_univl/backup/pdvc/modules/modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..9551b488c16d04fad65dcdaeba7d73d7740f2902
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/modeling.py
@@ -0,0 +1,429 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn import CrossEntropyLoss, MSELoss
+
+from pdvc.modules.until_module import PreTrainedModel, LayerNorm, CrossEn, MILNCELoss, MaxMarginRankingLoss
+from pdvc.modules.module_bert import BertModel, BertConfig, BertOnlyMLMHead
+from pdvc.modules.module_visual import VisualModel, VisualConfig, VisualOnlyMLMHead
+from pdvc.modules.module_cross import CrossModel, CrossConfig
+from pdvc.modules.module_decoder import DecoderModel, DecoderConfig
+
+logger = logging.getLogger(__name__)
+
+
+class UniVLPreTrainedModel(PreTrainedModel, nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs):
+        # utilize bert config as base config
+        super(UniVLPreTrainedModel, self).__init__(bert_config)
+        self.bert_config = bert_config
+        self.visual_config = visual_config
+        self.cross_config = cross_config
+        self.decoder_config = decoder_config
+
+        self.bert = None
+        self.visual = None
+        self.cross = None
+        self.decoder = None
+
+    @classmethod
+    def from_pretrained(cls, pretrained_bert_name, visual_model_name, cross_model_name, decoder_model_name,
+                        state_dict=None, cache_dir=None, type_vocab_size=2, *inputs, **kwargs):
+
+        task_config = None
+        if "task_config" in kwargs.keys():
+            task_config = kwargs["task_config"]
+            if not hasattr(task_config, "local_rank"):
+                task_config.__dict__["local_rank"] = 0
+            elif task_config.local_rank == -1:
+                task_config.local_rank = 0
+        print(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config)
+        bert_config, state_dict = BertConfig.get_config(pretrained_bert_name, cache_dir, type_vocab_size, state_dict, task_config=task_config)
+        visual_config, _ = VisualConfig.get_config(visual_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+        cross_config, _ = CrossConfig.get_config(cross_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+        decoder_config, _ = DecoderConfig.get_config(decoder_model_name, cache_dir, type_vocab_size, state_dict=None, task_config=task_config)
+
+        model = cls(bert_config, visual_config, cross_config, decoder_config, *inputs, **kwargs)
+
+        assert model.bert is not None
+        assert model.visual is not None
+
+        if state_dict is not None:
+            model = cls.init_preweight(model, state_dict, task_config=task_config)
+
+        return model
+
+class NormalizeVideo(nn.Module):
+    def __init__(self, task_config):
+        super(NormalizeVideo, self).__init__()
+        self.visual_norm2d = LayerNorm(task_config.video_dim)
+
+    def forward(self, video):
+        video = torch.as_tensor(video).float()
+        video = video.view(-1, video.shape[-2], video.shape[-1])
+        video = self.visual_norm2d(video)
+        return video
+
+def show_log(task_config, info):
+    if task_config is None or task_config.local_rank == 0:
+        logger.warning(info)
+
+def update_attr(target_name, target_config, target_attr_name, source_config, source_attr_name, default_value=None):
+    if hasattr(source_config, source_attr_name):
+        if default_value is None or getattr(source_config, source_attr_name) != default_value:
+            setattr(target_config, target_attr_name, getattr(source_config, source_attr_name))
+            show_log(source_config, "Set {}.{}: {}.".format(target_name,
+                                                            target_attr_name, getattr(target_config, target_attr_name)))
+    return target_config
+
+def check_attr(target_name, task_config):
+    return hasattr(task_config, target_name) and task_config.__dict__[target_name]
+
+class UniVL(UniVLPreTrainedModel):
+    def __init__(self, bert_config, visual_config, cross_config, decoder_config, task_config):
+        super(UniVL, self).__init__(bert_config, visual_config, cross_config, decoder_config)
+        self.task_config = task_config
+        self.ignore_video_index = -1
+
+        assert self.task_config.max_words <= bert_config.max_position_embeddings
+        assert self.task_config.max_words <= decoder_config.max_target_embeddings
+        assert self.task_config.max_frames <= visual_config.max_position_embeddings
+        assert self.task_config.max_words + self.task_config.max_frames <= cross_config.max_position_embeddings
+
+        self._stage_one = True
+        self._stage_two = False
+
+        if check_attr('stage_two', self.task_config):
+            self._stage_one = False
+            self._stage_two = self.task_config.stage_two
+        show_log(task_config, "Stage-One:{}, Stage-Two:{}".format(self._stage_one, self._stage_two))
+
+        self.train_sim_after_cross = False
+        if self._stage_one and check_attr('train_sim_after_cross', self.task_config):
+            self.train_sim_after_cross = True
+            show_log(task_config, "Test retrieval after cross encoder.")
+
+        # Text Encoder ===>
+        bert_config = update_attr("bert_config", bert_config, "num_hidden_layers",
+                                   self.task_config, "text_num_hidden_layers")
+        # print('=================The bert config:==========/n',bert_config)
+        # print('=================The task config:==========/n',self.task_config)
+        self.bert = BertModel(bert_config)
+        bert_word_embeddings_weight = self.bert.embeddings.word_embeddings.weight
+        bert_position_embeddings_weight = self.bert.embeddings.position_embeddings.weight
+        # <=== End of Text Encoder
+
+        # Video Encoder ===>
+        visual_config = update_attr("visual_config", visual_config, "num_hidden_layers",
+                                    self.task_config, "visual_num_hidden_layers")
+        self.visual = VisualModel(visual_config)
+        visual_word_embeddings_weight = self.visual.embeddings.word_embeddings.weight
+        # <=== End of Video Encoder
+
+        if self._stage_one is False or self.train_sim_after_cross:
+            # Cross Encoder ===>
+            cross_config = update_attr("cross_config", cross_config, "num_hidden_layers",
+                                        self.task_config, "cross_num_hidden_layers")
+            self.cross = CrossModel(cross_config) 
+            # <=== End of Cross Encoder
+
+            if self.train_sim_after_cross is False:
+                # Decoder ===>
+                decoder_config = update_attr("decoder_config", decoder_config, "num_decoder_layers",
+                                           self.task_config, "decoder_num_hidden_layers")
+                self.decoder = DecoderModel(decoder_config, bert_word_embeddings_weight, bert_position_embeddings_weight)
+                # <=== End of Decoder
+
+            if self.task_config.do_pretrain:
+                self.cls = BertOnlyMLMHead(bert_config, bert_word_embeddings_weight)
+                self.cls_visual = VisualOnlyMLMHead(visual_config, visual_word_embeddings_weight)
+                self.alm_loss_fct = CrossEntropyLoss(ignore_index=-1)
+                
+            self.similarity_dense = nn.Linear(bert_config.hidden_size, 1)
+            self.decoder_loss_fct = CrossEntropyLoss(ignore_index=-1)
+
+        self.normalize_video = NormalizeVideo(task_config)
+
+        mILNCELoss = MILNCELoss(batch_size=task_config.batch_size // task_config.n_gpu, n_pair=task_config.n_pair, )
+        maxMarginRankingLoss = MaxMarginRankingLoss(margin=task_config.margin,
+                                                    negative_weighting=task_config.negative_weighting,
+                                                    batch_size=task_config.batch_size // task_config.n_gpu,
+                                                    n_pair=task_config.n_pair,
+                                                    hard_negative_rate=task_config.hard_negative_rate, )
+
+        if task_config.use_mil:
+            self.loss_fct = CrossEn() if self._stage_two else mILNCELoss
+            self._pretrain_sim_loss_fct = mILNCELoss
+        else:
+            self.loss_fct = CrossEn() if self._stage_two else maxMarginRankingLoss
+            self._pretrain_sim_loss_fct = maxMarginRankingLoss
+
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, token_type_ids, attention_mask, video, video_mask=None,
+                pairs_masked_text=None, pairs_token_labels=None, masked_video=None, video_labels_index=None,
+                input_caption_ids=None, decoder_mask=None, output_caption_ids=None):
+
+        input_ids = input_ids.view(-1, input_ids.shape[-1])
+        token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1])
+        attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+        video_mask = video_mask.view(-1, video_mask.shape[-1])
+        video = self.normalize_video(video)
+
+        if input_caption_ids is not None:
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        sequence_output, visual_output = self.get_sequence_visual_output(input_ids, token_type_ids, attention_mask,
+                                                                         video, video_mask, shaped=True)
+
+        if self.training:
+            loss = 0.
+            if self._stage_one:
+                sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask,
+                                                        video_mask, shaped=True)
+                sim_loss = self.loss_fct(sim_matrix)
+                loss += sim_loss
+
+            if self._stage_two:
+                if self.task_config.do_pretrain:
+                    pairs_masked_text = pairs_masked_text.view(-1, pairs_masked_text.shape[-1])
+                    pairs_token_labels = pairs_token_labels.view(-1, pairs_token_labels.shape[-1])
+
+                    masked_video = self.normalize_video(masked_video)
+                    video_labels_index = video_labels_index.view(-1, video_labels_index.shape[-1])
+
+                    sequence_output_alm, visual_output_alm = self.get_sequence_visual_output(pairs_masked_text, token_type_ids,
+                                                                                             attention_mask, masked_video, video_mask, shaped=True)
+
+                    cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output_alm, visual_output_alm, attention_mask, video_mask)
+                    sequence_cross_output, visual_cross_output = torch.split(cross_output, [attention_mask.size(-1), video_mask.size(-1)], dim=1)
+
+                    alm_loss = self._calculate_mlm_loss(sequence_cross_output, pairs_token_labels)
+                    loss += alm_loss
+
+                    nce_loss = self._calculate_mfm_loss(visual_cross_output, video, video_mask, video_labels_index)
+                    loss += nce_loss
+
+                    sim_matrix = self.get_similarity_logits(sequence_output, visual_output, attention_mask, video_mask,
+                                                            shaped=True, _pretrain_joint=True)
+                    sim_loss_joint = self._pretrain_sim_loss_fct(sim_matrix)
+                    loss += sim_loss_joint
+
+                if (input_caption_ids is not None) and \
+                        (self.task_config.do_pretrain
+                         or (self.task_config.do_pretrain is False and self.task_config.task_type == "caption")):
+                    if self.task_config.do_pretrain:
+                        decoder_scores, res_tuples = self._get_decoder_score(sequence_output_alm, visual_output_alm,
+                                                                             input_ids, attention_mask, video_mask,
+                                                                             input_caption_ids, decoder_mask, shaped=True)
+                    elif self.task_config.task_type == "caption":
+                        decoder_scores, res_tuples = self._get_decoder_score(sequence_output, visual_output,
+                                                                             input_ids, attention_mask, video_mask,
+                                                                             input_caption_ids, decoder_mask, shaped=True)
+                    else:
+                        raise NotImplementedError
+
+                    output_caption_ids = output_caption_ids.view(-1, output_caption_ids.shape[-1])
+                    decoder_loss = self.decoder_loss_fct(decoder_scores.view(-1, self.bert_config.vocab_size), output_caption_ids.view(-1))
+                    loss += decoder_loss
+
+                if self.task_config.do_pretrain or self.task_config.task_type == "retrieval":
+                    if self.task_config.do_pretrain:
+                        sim_matrix_text_visual = self.get_similarity_logits(sequence_output_alm, visual_output_alm,
+                                                                            attention_mask, video_mask, shaped=True)
+                    elif self.task_config.task_type == "retrieval":
+                        sim_matrix_text_visual = self.get_similarity_logits(sequence_output, visual_output,
+                                                                            attention_mask, video_mask, shaped=True)
+                    else:
+                        raise NotImplementedError
+
+                    sim_loss_text_visual = self.loss_fct(sim_matrix_text_visual)
+                    loss += sim_loss_text_visual
+
+            return loss
+        else:
+            return None
+
+    def _calculate_mlm_loss(self, sequence_output_alm, pairs_token_labels):
+        alm_scores = self.cls(sequence_output_alm)
+        alm_loss = self.alm_loss_fct(alm_scores.view(-1, self.bert_config.vocab_size), pairs_token_labels.view(-1))
+        return alm_loss
+
+    def _calculate_mfm_loss(self, visual_output_alm, video, video_mask, video_labels_index):
+        afm_scores = self.cls_visual(visual_output_alm)
+        afm_scores_tr = afm_scores.view(-1, afm_scores.shape[-1])
+
+        video_tr = video.permute(2, 0, 1)
+        video_tr = video_tr.view(video_tr.shape[0], -1)
+
+        logits_matrix = torch.mm(afm_scores_tr, video_tr)
+        video_mask_float = video_mask.to(dtype=torch.float)
+        mask_matrix = torch.mm(video_mask_float.view(-1, 1), video_mask_float.view(1, -1))
+        masked_logits = logits_matrix + (1. - mask_matrix) * -1e8
+
+        logpt = F.log_softmax(masked_logits, dim=-1)
+        logpt = torch.diag(logpt)
+        nce_loss = -logpt
+
+        video_labels_index_mask = (video_labels_index != self.ignore_video_index)
+        nce_loss = nce_loss.masked_select(video_labels_index_mask.view(-1))
+        nce_loss = nce_loss.mean()
+        return nce_loss
+
+    def get_sequence_visual_output(self, input_ids, token_type_ids, attention_mask, video, video_mask, shaped=False):
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+            video = self.normalize_video(video)
+        encoded_layers, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=True)
+        sequence_output = encoded_layers[-1]
+
+        visual_layers, _ = self.visual(video, video_mask, output_all_encoded_layers=True)
+        visual_output = visual_layers[-1]
+
+        return sequence_output, visual_output
+
+    def _get_cross_output(self, sequence_output, visual_output, attention_mask, video_mask):
+        concat_features = torch.cat((sequence_output, visual_output), dim=1)  # concatnate tokens and frames
+        concat_mask = torch.cat((attention_mask, video_mask), dim=1)
+        text_type_ = torch.zeros_like(attention_mask)
+        video_type_ = torch.ones_like(video_mask)
+        concat_type = torch.cat((text_type_, video_type_), dim=1)
+
+        cross_layers, pooled_output = self.cross(concat_features, concat_type, concat_mask, output_all_encoded_layers=True)
+        cross_output = cross_layers[-1]
+
+        return cross_output, pooled_output, concat_mask
+
+    def _mean_pooling_for_similarity(self, sequence_output, visual_output, attention_mask, video_mask,):
+        attention_mask_un = attention_mask.to(dtype=torch.float).unsqueeze(-1)
+        attention_mask_un[:, 0, :] = 0.
+        sequence_output = sequence_output * attention_mask_un
+        text_out = torch.sum(sequence_output, dim=1) / torch.sum(attention_mask_un, dim=1, dtype=torch.float)
+
+        video_mask_un = video_mask.to(dtype=torch.float).unsqueeze(-1)
+        visual_output = visual_output * video_mask_un
+        video_mask_un_sum = torch.sum(video_mask_un, dim=1, dtype=torch.float)
+        video_mask_un_sum[video_mask_un_sum == 0.] = 1.
+        video_out = torch.sum(visual_output, dim=1) / video_mask_un_sum
+
+        return text_out, video_out
+
+    def _cross_similarity(self, sequence_output, visual_output, attention_mask, video_mask):
+        b_text, s_text, h_text = sequence_output.size()
+        b_visual, s_visual, h_visual = visual_output.size()
+
+        retrieve_logits_list = []
+        step_size = 5
+
+        split_size = [step_size] * (b_text // step_size)
+        release_size = b_text - sum(split_size)
+        if release_size > 0:
+            split_size += [release_size]
+
+        sequence_output_splits = torch.split(sequence_output, split_size, dim=0)
+        attention_mask_splits = torch.split(attention_mask, split_size, dim=0)
+        for i in range(len(split_size)):
+            sequence_output_row = sequence_output_splits[i]
+            attention_mask_row = attention_mask_splits[i]
+            sequence_output_l = sequence_output_row.unsqueeze(1).repeat(1, b_visual, 1, 1)
+            sequence_output_l = sequence_output_l.view(-1, s_text, h_text)
+            attention_mask_l = attention_mask_row.unsqueeze(1).repeat(1, b_visual, 1)
+            attention_mask_l = attention_mask_l.view(-1, s_text)
+
+            step_truth = sequence_output_row.size(0)
+            visual_output_r = visual_output.unsqueeze(0).repeat(step_truth, 1, 1, 1)
+            visual_output_r = visual_output_r.view(-1, s_visual, h_visual)
+            video_mask_r = video_mask.unsqueeze(0).repeat(step_truth, 1, 1)
+            video_mask_r = video_mask_r.view(-1, s_visual)
+
+            cross_output, pooled_output, concat_mask = \
+                self._get_cross_output(sequence_output_l, visual_output_r, attention_mask_l, video_mask_r)
+            retrieve_logits_row = self.similarity_dense(pooled_output).squeeze(-1).view(step_truth, b_visual)
+
+            retrieve_logits_list.append(retrieve_logits_row)
+        retrieve_logits = torch.cat(retrieve_logits_list, dim=0)
+        return retrieve_logits
+
+    def get_similarity_logits(self, sequence_output, visual_output, attention_mask, video_mask, shaped=False, _pretrain_joint=False):
+        if shaped is False:
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+        if (self._stage_two and _pretrain_joint is False) or self.train_sim_after_cross:
+            retrieve_logits = self._cross_similarity(sequence_output, visual_output, attention_mask, video_mask)
+        else:
+            text_out, video_out = self._mean_pooling_for_similarity(sequence_output, visual_output, attention_mask, video_mask)
+            if self.task_config.use_mil is False:
+                text_out = F.normalize(text_out, dim=-1)
+                video_out = F.normalize(video_out, dim=-1)
+            retrieve_logits = torch.matmul(text_out, video_out.t())
+
+        return retrieve_logits
+
+    def _get_decoder_score(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask, shaped=False):
+
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        res_tuples = ()
+        cross_output, pooled_output, concat_mask = self._get_cross_output(sequence_output, visual_output, attention_mask, video_mask)
+        decoder_scores = self.decoder(input_caption_ids, encoder_outs=cross_output, answer_mask=decoder_mask, encoder_mask=concat_mask)
+
+        return decoder_scores, res_tuples
+
+    def decoder_caption(self, sequence_output, visual_output, input_ids, attention_mask, video_mask, input_caption_ids, decoder_mask,
+                        shaped=False, get_logits=False):
+        if shaped is False:
+            input_ids = input_ids.view(-1, input_ids.shape[-1])
+            attention_mask = attention_mask.view(-1, attention_mask.shape[-1])
+            video_mask = video_mask.view(-1, video_mask.shape[-1])
+
+            input_caption_ids = input_caption_ids.view(-1, input_caption_ids.shape[-1])
+            decoder_mask = decoder_mask.view(-1, decoder_mask.shape[-1])
+
+        decoder_scores, _ = self._get_decoder_score(sequence_output, visual_output,
+                                                    input_ids, attention_mask, video_mask,
+                                                    input_caption_ids, decoder_mask, shaped=True)
+
+        if get_logits:
+            return decoder_scores
+
+        _, decoder_scores_result = torch.max(decoder_scores, -1)
+
+        return decoder_scores_result
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/module_bert.py b/yc2_univl/backup/pdvc/modules/module_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa376657fdf271f11978379665a67897c2cc5943
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/module_bert.py
@@ -0,0 +1,447 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz",
+}
+
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+
+
+class BertConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `BertModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs BertConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `BertModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(BertEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids, token_type_ids=None):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = words_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(BertSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super(BertAttention, self).__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super(BertLayer, self).__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super(BertEncoder, self).__init__()
+        layer = BertLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super(BertPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(bert_model_embedding_weights.size(1),
+                                 bert_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = bert_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class BertOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(BertOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class BertPreTrainingHeads(nn.Module):
+    def __init__(self, config, bert_model_embedding_weights):
+        super(BertPreTrainingHeads, self).__init__()
+        self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+class BertModel(PreTrainedModel):
+    """BERT model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a BertConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see BERT paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see BERT's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
+
+    config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.BertModel(config=config)
+    all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(BertModel, self).__init__(config)
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.apply(self.init_weights)
+
+
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros_like(input_ids)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(input_ids, token_type_ids)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/module_cross.py b/yc2_univl/backup/pdvc/modules/module_cross.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ff41910a2c62e1c79ab3f843bef3c54171bb026
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/module_cross.py
@@ -0,0 +1,394 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'cross_config.json'
+WEIGHTS_NAME = 'cross_pytorch_model.bin'
+
+
+class CrossConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `CrossModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02):
+        """Constructs CrossConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CrossModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `CrossModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+
+class CrossEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(CrossEmbeddings, self).__init__()
+
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, concat_embeddings, concat_type=None):
+
+        batch_size, seq_length = concat_embeddings.size(0), concat_embeddings.size(1)
+        if concat_type is None:
+            concat_type = torch.zeros(batch_size, concat_type).to(concat_embeddings.device)
+
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=concat_embeddings.device)
+        position_ids = position_ids.unsqueeze(0).expand(concat_embeddings.size(0), -1)
+
+        token_type_embeddings = self.token_type_embeddings(concat_type)
+        position_embeddings = self.position_embeddings(position_ids)
+
+        embeddings = concat_embeddings + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class CrossSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(CrossSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in CrossModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class CrossSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(CrossSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class CrossAttention(nn.Module):
+    def __init__(self, config):
+        super(CrossAttention, self).__init__()
+        self.self = CrossSelfAttention(config)
+        self.output = CrossSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class CrossIntermediate(nn.Module):
+    def __init__(self, config):
+        super(CrossIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class CrossOutput(nn.Module):
+    def __init__(self, config):
+        super(CrossOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class CrossLayer(nn.Module):
+    def __init__(self, config):
+        super(CrossLayer, self).__init__()
+        self.attention = CrossAttention(config)
+        self.intermediate = CrossIntermediate(config)
+        self.output = CrossOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class CrossEncoder(nn.Module):
+    def __init__(self, config):
+        super(CrossEncoder, self).__init__()
+        layer = CrossLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class CrossPooler(nn.Module):
+    def __init__(self, config):
+        super(CrossPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class CrossPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(CrossPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class CrossLMPredictionHead(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossLMPredictionHead, self).__init__()
+        self.transform = CrossPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(cross_model_embedding_weights.size(1),
+                                 cross_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = cross_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(cross_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class CrossOnlyMLMHead(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossOnlyMLMHead, self).__init__()
+        self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class CrossOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(CrossOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class CrossPreTrainingHeads(nn.Module):
+    def __init__(self, config, cross_model_embedding_weights):
+        super(CrossPreTrainingHeads, self).__init__()
+        self.predictions = CrossLMPredictionHead(config, cross_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+
+class CrossModel(PreTrainedModel):
+    def __init__(self, config):
+        super(CrossModel, self).__init__(config)
+        self.embeddings = CrossEmbeddings(config)
+        self.encoder = CrossEncoder(config)
+        self.pooler = CrossPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, concat_input, concat_type=None, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones(concat_input.size(0), concat_input.size(1))
+        if concat_type is None:
+            concat_type = torch.zeros_like(attention_mask)
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(concat_input, concat_type)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
diff --git a/yc2_univl/backup/pdvc/modules/module_decoder.py b/yc2_univl/backup/pdvc/modules/module_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..25622d1e4c0e9a0d19fe2b4986f7267ba1526823
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/module_decoder.py
@@ -0,0 +1,406 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+import numpy as np
+
+import torch
+from torch import nn
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'decoder_config.json'
+WEIGHTS_NAME = 'decoder_pytorch_model.bin'
+
+
+class DecoderConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `DecoderModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 type_vocab_size=2,
+                 initializer_range=0.02,
+                 max_target_embeddings=128,
+                 num_decoder_layers=1):
+        """Constructs DecoderConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `DecoderModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `DecoderModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+            max_target_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            num_decoder_layers:
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+            self.max_target_embeddings = max_target_embeddings
+            self.num_decoder_layers = num_decoder_layers
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(BertSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super(BertIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super(BertOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(BertPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config, decoder_model_embedding_weights):
+        super(BertLMPredictionHead, self).__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(decoder_model_embedding_weights.size(1),
+                                 decoder_model_embedding_weights.size(0),
+                                 bias=False)
+        self.decoder.weight = decoder_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(decoder_model_embedding_weights.size(0)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states) + self.bias
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config, decoder_model_embedding_weights):
+        super(BertOnlyMLMHead, self).__init__()
+        self.predictions = BertLMPredictionHead(config, decoder_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+class MultiHeadAttention(nn.Module):
+    ''' Multi-Head Attention module '''
+
+    def __init__(self, config):
+        super(MultiHeadAttention, self).__init__()
+
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, q, k, v, attention_mask):
+        mixed_query_layer = self.query(q)
+        mixed_key_layer = self.key(k)
+        mixed_value_layer = self.value(v)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        return context_layer, attention_scores
+
+class PositionwiseFeedForward(nn.Module):
+    ''' A two-feed-forward-layer module '''
+
+    def __init__(self, d_in, d_hid, dropout=0.1):
+        super().__init__()
+        self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise
+        self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise
+        self.layer_norm = nn.LayerNorm(d_in)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        residual = x
+        output = x.transpose(1, 2)
+        output = self.w_2(ACT2FN["gelu"](self.w_1(output)))
+        output = output.transpose(1, 2)
+        output = self.dropout(output)
+        output = self.layer_norm(output + residual)
+        return output
+
+class DecoderAttention(nn.Module):
+    def __init__(self, config):
+        super(DecoderAttention, self).__init__()
+        self.att = MultiHeadAttention(config)
+        self.output = BertSelfOutput(config)
+
+    def forward(self, q, k, v, attention_mask):
+        att_output, attention_probs = self.att(q, k, v, attention_mask)
+        attention_output = self.output(att_output, q)
+        return attention_output, attention_probs
+
+class DecoderLayer(nn.Module):
+    def __init__(self, config):
+        super(DecoderLayer, self).__init__()
+        self.slf_attn = DecoderAttention(config)
+        self.enc_attn = DecoderAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(self, dec_input, enc_output, slf_attn_mask=None, dec_enc_attn_mask=None):
+        slf_output, _ = self.slf_attn(dec_input, dec_input, dec_input, slf_attn_mask)
+        dec_output, dec_att_scores = self.enc_attn(slf_output, enc_output, enc_output, dec_enc_attn_mask)
+        intermediate_output = self.intermediate(dec_output)
+        dec_output = self.output(intermediate_output, dec_output)
+        return dec_output, dec_att_scores
+
+class DecoderEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight):
+        super(DecoderEmbeddings, self).__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_target_embeddings, config.hidden_size)
+        self.word_embeddings.weight = decoder_word_embeddings_weight
+        self.position_embeddings.weight = decoder_position_embeddings_weight
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids):
+        seq_length = input_ids.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+
+        words_embeddings = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+
+        embeddings = words_embeddings + position_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class Decoder(nn.Module):
+    def __init__(self, config):
+        super(Decoder, self).__init__()
+        layer = DecoderLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_decoder_layers)])
+
+    def forward(self, hidden_states, encoder_outs, self_attn_mask, attention_mask, output_all_encoded_layers=False):
+        dec_att_scores = None
+        all_encoder_layers = []
+        all_dec_att_probs = []
+        for layer_module in self.layer:
+            hidden_states, dec_att_scores = layer_module(hidden_states, encoder_outs, self_attn_mask, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+                all_dec_att_probs.append(dec_att_scores)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+            all_dec_att_probs.append(dec_att_scores)
+        return all_encoder_layers, all_dec_att_probs
+
+class DecoderClassifier(nn.Module):
+    def __init__(self, config, embedding_weights):
+        super(DecoderClassifier, self).__init__()
+        self.cls = BertOnlyMLMHead(config, embedding_weights)
+
+    def forward(self, hidden_states):
+        cls_scores = self.cls(hidden_states)
+        return cls_scores
+
+class DecoderModel(PreTrainedModel):
+
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        final_norm (bool, optional): apply layer norm to the output of the
+            final decoder layer (default: True).
+    """
+
+    def __init__(self, config, decoder_word_embeddings_weight, decoder_position_embeddings_weight):
+        super(DecoderModel, self).__init__(config)
+        self.config = config
+        self.max_target_length = config.max_target_embeddings
+        self.embeddings = DecoderEmbeddings(config, decoder_word_embeddings_weight, decoder_position_embeddings_weight)
+        self.decoder = Decoder(config)
+        self.classifier = DecoderClassifier(config, decoder_word_embeddings_weight)
+        self.apply(self.init_weights)
+
+    def forward(self, input_ids, encoder_outs=None, answer_mask=None, encoder_mask=None):
+        """
+        Args:
+            input_ids (LongTensor): previous decoder outputs of shape `(batch, tgt_len)`, for input feeding/teacher forcing
+            encoder_outs (Tensor, optional): output from the encoder, used for encoder-side attention
+
+        Returns:
+            tuple:
+                - the last decoder layer's output of shape `(batch, tgt_len, vocab)`
+                - the last decoder layer's attention weights of shape `(batch, tgt_len, src_len)`
+        """
+        embedding_output = self.embeddings(input_ids)
+
+        extended_encoder_mask = encoder_mask.unsqueeze(1).unsqueeze(2)   # b x 1 x 1 x ls
+        extended_encoder_mask = extended_encoder_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_encoder_mask = (1.0 - extended_encoder_mask) * -10000.0
+
+        extended_answer_mask = answer_mask.unsqueeze(1).unsqueeze(2)
+        extended_answer_mask = extended_answer_mask.to(dtype=self.dtype)  # fp16 compatibility
+
+        sz_b, len_s, _ = embedding_output.size()
+        subsequent_mask = torch.triu(torch.ones((len_s, len_s), device=embedding_output.device, dtype=embedding_output.dtype), diagonal=1)
+        self_attn_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1).unsqueeze(1)  # b x 1 x ls x ls
+        slf_attn_mask = ((1.0 - extended_answer_mask) + self_attn_mask).gt(0).to(dtype=self.dtype)
+        self_attn_mask = slf_attn_mask * -10000.0
+
+        decoded_layers, dec_att_scores = self.decoder(embedding_output,
+                                      encoder_outs,
+                                      self_attn_mask,
+                                      extended_encoder_mask,
+                                      )
+        sequence_output = decoded_layers[-1]
+        cls_scores = self.classifier(sequence_output)
+
+        return cls_scores
diff --git a/yc2_univl/backup/pdvc/modules/module_visual.py b/yc2_univl/backup/pdvc/modules/module_visual.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9a43f8a74c1e5e020c8b4daec33d7adb5d3b840
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/module_visual.py
@@ -0,0 +1,425 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import math
+import logging
+import tarfile
+import tempfile
+import shutil
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .file_utils import cached_path
+from .until_config import PretrainedConfig
+from .until_module import PreTrainedModel, LayerNorm, ACT2FN
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_MODEL_ARCHIVE_MAP = {}
+CONFIG_NAME = 'visual_config.json'
+WEIGHTS_NAME = 'visual_pytorch_model.bin'
+
+
+class VisualConfig(PretrainedConfig):
+    """Configuration class to store the configuration of a `VisualModel`.
+    """
+    pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
+    config_name = CONFIG_NAME
+    weights_name = WEIGHTS_NAME
+    def __init__(self,
+                 vocab_size_or_config_json_file=4096,
+                 hidden_size=768,
+                 num_hidden_layers=3,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 initializer_range=0.02):
+        """Constructs VisualConfig.
+
+        Args:
+            vocab_size_or_config_json_file: Size of the encoder layers and the pooler layer.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        if isinstance(vocab_size_or_config_json_file, str):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.initializer_range = initializer_range
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             "or the path to a pretrained model config file (str)")
+
+class VisualEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+    def __init__(self, config):
+        super(VisualEmbeddings, self).__init__()
+
+        self.word_embeddings = nn.Linear(config.vocab_size, config.hidden_size)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_embeddings):
+        seq_length = input_embeddings.size(1)
+        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_embeddings.device)
+        position_ids = position_ids.unsqueeze(0).expand(input_embeddings.size(0), -1)
+
+        words_embeddings = self.word_embeddings(input_embeddings)
+        # words_embeddings = self.transform_act_fn(words_embeddings)
+
+        position_embeddings = self.position_embeddings(position_ids)
+        embeddings = words_embeddings + position_embeddings
+
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+class VisualSelfAttention(nn.Module):
+    def __init__(self, config):
+        super(VisualSelfAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, attention_mask):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in VisualModel forward() function)
+        attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class VisualSelfOutput(nn.Module):
+    def __init__(self, config):
+        super(VisualSelfOutput, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class VisualAttention(nn.Module):
+    def __init__(self, config):
+        super(VisualAttention, self).__init__()
+        self.self = VisualSelfAttention(config)
+        self.output = VisualSelfOutput(config)
+
+    def forward(self, input_tensor, attention_mask):
+        self_output = self.self(input_tensor, attention_mask)
+        attention_output = self.output(self_output, input_tensor)
+        return attention_output
+
+
+class VisualIntermediate(nn.Module):
+    def __init__(self, config):
+        super(VisualIntermediate, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class VisualOutput(nn.Module):
+    def __init__(self, config):
+        super(VisualOutput, self).__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class VisualLayer(nn.Module):
+    def __init__(self, config):
+        super(VisualLayer, self).__init__()
+        self.attention = VisualAttention(config)
+        self.intermediate = VisualIntermediate(config)
+        self.output = VisualOutput(config)
+
+    def forward(self, hidden_states, attention_mask):
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class VisualEncoder(nn.Module):
+    def __init__(self, config):
+        super(VisualEncoder, self).__init__()
+        layer = VisualLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
+
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+
+
+class VisualPooler(nn.Module):
+    def __init__(self, config):
+        super(VisualPooler, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class VisualPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super(VisualPredictionHeadTransform, self).__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=1e-12)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class VisualLMPredictionHead(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualLMPredictionHead, self).__init__()
+        self.transform = VisualPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.weight = visual_model_embedding_weights
+        self.bias = nn.Parameter(torch.zeros(visual_model_embedding_weights.size(1)))
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = hidden_states.matmul(self.weight) + self.bias
+        return hidden_states
+
+
+class VisualOnlyMLMHead(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualOnlyMLMHead, self).__init__()
+        self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class VisualOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super(VisualOnlyNSPHead, self).__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class VisualPreTrainingHeads(nn.Module):
+    def __init__(self, config, visual_model_embedding_weights):
+        super(VisualPreTrainingHeads, self).__init__()
+        self.predictions = VisualLMPredictionHead(config, visual_model_embedding_weights)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+
+class VisualModel(PreTrainedModel):
+    """Visual model ("Bidirectional Embedding Representations from a Transformer").
+
+    Params:
+        config: a VisualConfig class instance with the configuration to build a new model
+
+    Inputs:
+        `type`: a str, indicates which masking will be used in the attention, choice from [`bi`, `seq`, `gen`]
+        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
+            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
+            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
+        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
+            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
+            a `sentence B` token (see  paper for more details).
+        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
+            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
+            input sequence length in the current batch. It's the mask that we typically use for attention when
+            a batch has varying length sentences.
+        `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`.
+
+    Outputs: Tuple of (encoded_layers, pooled_output)
+        `encoded_layers`: controled by `output_all_encoded_layers` argument:
+            - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end
+                of each attention block (i.e. 12 full sequences for Visual-base, 24 for Visual-large), each
+                encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size],
+            - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding
+                to the last attention block of shape [batch_size, sequence_length, hidden_size],
+        `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a
+            classifier pretrained on top of the hidden state associated to the first character of the
+            input (`CLF`) to train on the Next-Sentence task (see 's paper).
+
+    Example usage:
+    ```python
+    # Already been converted into WordPiece token ids
+    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
+    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
+
+    config = modeling.VisualConfig(vocab_size_or_config_json_file=4096, hidden_size=768,
+        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
+
+    model = modeling.VisualModel(config=config)
+    all_encoder_layers, pooled_output = model(video, video_mask)
+    ```
+    """
+    def __init__(self, config):
+        super(VisualModel, self).__init__(config)
+        self.embeddings = VisualEmbeddings(config)
+        self.encoder = VisualEncoder(config)
+        self.pooler = VisualPooler(config)
+        self.apply(self.init_weights)
+
+    def forward(self, video, attention_mask=None, output_all_encoded_layers=True):
+
+        if attention_mask is None:
+            attention_mask = torch.ones(video.size(0), video.size(1))
+
+        # We create a 3D attention mask from a 2D tensor mask.
+        # Sizes are [batch_size, 1, 1, to_seq_length]
+        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+        # this attention mask is more simple than the triangular masking of causal attention
+        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+
+        embedding_output = self.embeddings(video)
+        encoded_layers = self.encoder(embedding_output,
+                                      extended_attention_mask,
+                                      output_all_encoded_layers=output_all_encoded_layers)
+        sequence_output = encoded_layers[-1]
+        pooled_output = self.pooler(sequence_output)
+        if not output_all_encoded_layers:
+            encoded_layers = encoded_layers[-1]
+        return encoded_layers, pooled_output
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/optimization.py b/yc2_univl/backup/pdvc/modules/optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..264c57c7d8f213004b4ee82a8861e0ae6103c906
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/optimization.py
@@ -0,0 +1,168 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch optimization for BERT model."""
+
+import math
+import torch
+from torch.optim import Optimizer
+from torch.optim.optimizer import required
+from torch.nn.utils import clip_grad_norm_
+import logging
+
+logger = logging.getLogger(__name__)
+
+def warmup_cosine(x, warmup=0.002):
+    if x < warmup:
+        return x/warmup
+    return 0.5 * (1.0 + torch.cos(math.pi * x))
+
+def warmup_constant(x, warmup=0.002):
+    """ Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps.
+        Learning rate is 1. afterwards. """
+    if x < warmup:
+        return x/warmup
+    return 1.0
+
+def warmup_linear(x, warmup=0.002):
+    """ Specifies a triangular learning rate schedule where peak is reached at `warmup`*`t_total`-th (as provided to BertAdam) training step.
+        After `t_total`-th training step, learning rate is zero. """
+    if x < warmup:
+        return x/warmup
+    return max((x-1.)/(warmup-1.), 0)
+
+SCHEDULES = {
+    'warmup_cosine':   warmup_cosine,
+    'warmup_constant': warmup_constant,
+    'warmup_linear':   warmup_linear,
+}
+
+
+class BertAdam(Optimizer):
+    """Implements BERT version of Adam algorithm with weight decay fix.
+    Params:
+        lr: learning rate
+        warmup: portion of t_total for the warmup, -1  means no warmup. Default: -1
+        t_total: total number of training steps for the learning
+            rate schedule, -1  means constant learning rate. Default: -1
+        schedule: schedule to use for the warmup (see above). Default: 'warmup_linear'
+        b1: Adams b1. Default: 0.9
+        b2: Adams b2. Default: 0.999
+        e: Adams epsilon. Default: 1e-6
+        weight_decay: Weight decay. Default: 0.01
+        max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
+    """
+    def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
+                 b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01,
+                 max_grad_norm=1.0):
+        if lr is not required and lr < 0.0:
+            raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
+        if schedule not in SCHEDULES:
+            raise ValueError("Invalid schedule parameter: {}".format(schedule))
+        if not 0.0 <= warmup < 1.0 and not warmup == -1:
+            raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
+        if not 0.0 <= b1 < 1.0:
+            raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1))
+        if not 0.0 <= b2 < 1.0:
+            raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2))
+        if not e >= 0.0:
+            raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
+        defaults = dict(lr=lr, schedule=schedule, warmup=warmup, t_total=t_total,
+                        b1=b1, b2=b2, e=e, weight_decay=weight_decay,
+                        max_grad_norm=max_grad_norm)
+        super(BertAdam, self).__init__(params, defaults)
+
+    def get_lr(self):
+        lr = []
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                state = self.state[p]
+                if len(state) == 0:
+                    return [0]
+                if group['t_total'] != -1:
+                    schedule_fct = SCHEDULES[group['schedule']]
+                    lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
+                else:
+                    lr_scheduled = group['lr']
+                lr.append(lr_scheduled)
+        return lr
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.is_sparse:
+                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
+
+                state = self.state[p]
+
+                # State initialization
+                if len(state) == 0:
+                    state['step'] = 0
+                    # Exponential moving average of gradient values
+                    state['next_m'] = torch.zeros_like(p.data)
+                    # Exponential moving average of squared gradient values
+                    state['next_v'] = torch.zeros_like(p.data)
+
+                next_m, next_v = state['next_m'], state['next_v']
+                beta1, beta2 = group['b1'], group['b2']
+
+                # Add grad clipping
+                if group['max_grad_norm'] > 0:
+                    clip_grad_norm_(p, group['max_grad_norm'])
+
+                # Decay the first and second moment running average coefficient
+                # In-place operations to update the averages at the same time
+                # next_m.mul_(beta1).add_(1 - beta1, grad) --> pytorch 1.7
+                next_m.mul_(beta1).add_(grad, alpha=1 - beta1)
+                # next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad) --> pytorch 1.7
+                next_v.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                update = next_m / (next_v.sqrt() + group['e'])
+
+                # Just adding the square of the weights to the loss function is *not*
+                # the correct way of using L2 regularization/weight decay with Adam,
+                # since that will interact with the m and v parameters in strange ways.
+                #
+                # Instead we want to decay the weights in a manner that doesn't interact
+                # with the m/v parameters. This is equivalent to adding the square
+                # of the weights to the loss with plain (non-momentum) SGD.
+                if group['weight_decay'] > 0.0:
+                    update += group['weight_decay'] * p.data
+
+                if group['t_total'] != -1:
+                    schedule_fct = SCHEDULES[group['schedule']]
+                    progress = state['step']/group['t_total']
+                    lr_scheduled = group['lr'] * schedule_fct(progress, group['warmup'])
+                else:
+                    lr_scheduled = group['lr']
+
+                update_with_lr = lr_scheduled * update
+                p.data.add_(-update_with_lr)
+
+                state['step'] += 1
+
+        return loss
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/tokenization.py b/yc2_univl/backup/pdvc/modules/tokenization.py
new file mode 100644
index 0000000000000000000000000000000000000000..183c81000f82aae59295f8d8572b6bcf67891790
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/tokenization.py
@@ -0,0 +1,408 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import unicodedata
+import os
+import sys
+import logging
+
+from .file_utils import cached_path
+
+logger = logging.getLogger(__name__)
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
+    'base-uncased': 512,
+    'large-uncased': 512,
+    'base-cased': 512,
+    'large-cased': 512,
+    'base-multilingual-uncased': 512,
+    'base-multilingual-cased': 512,
+    'base-chinese': 512,
+}
+VOCAB_NAME = 'vocab.txt'
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None, never_split=("[UNK]", "[SEP]", "[MASK]", "[CLS]")):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            if token not in self.vocab:
+                ids.append(self.vocab["[UNK]"])
+                logger.error("Cannot find token '{}' in vocab. Using [UNK] insetad".format(token))
+            else:
+                ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            raise ValueError(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Download and cache the pre-trained model file if needed.
+        """
+        vocab_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(vocab_file) is False:
+            if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
+                vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
+            else:
+                vocab_file = pretrained_model_name
+        if os.path.isdir(vocab_file):
+            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
+        # redirect to the cache, if necessary
+        print(vocab_file)
+        try:
+            resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            logger.error(
+                "Model name '{}' was not found. "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name,
+                    vocab_file))
+            return None
+        if resolved_vocab_file == vocab_file:
+            logger.info("loading vocabulary file {}".format(vocab_file))
+        else:
+            logger.info("loading vocabulary file {} from cache at {}".format(
+                vocab_file, resolved_vocab_file))
+        if pretrained_model_name in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
+            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
+            # than the number of positional embeddings
+            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name]
+            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
+            kwargs['never_split'] = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")
+
+        # Instantiate tokenizer.
+        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
+
+        return tokenizer
+
+    def add_tokens(self, new_tokens, model):
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+        Args:
+            new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        Returns:
+            Number of tokens added to the vocabulary.
+        Examples::
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+
+        to_add_tokens = []
+        for token in new_tokens:
+            assert isinstance(token, str)
+            to_add_tokens.append(token)
+            # logger.info("Adding %s to the vocabulary", token)
+
+        vocab = collections.OrderedDict()
+        for token in self.vocab.keys():
+            vocab[token] = self.vocab[token]
+        for token in to_add_tokens:
+            vocab[token] = len(vocab)
+        self.vocab = self.wordpiece_tokenizer.vocab = vocab
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+
+        model.resize_token_embeddings(new_num_tokens=len(vocab))
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True, never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
diff --git a/yc2_univl/backup/pdvc/modules/until_config.py b/yc2_univl/backup/pdvc/modules/until_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..596c157aa23c82eb33c1fb2e07d9b006a52990e9
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/until_config.py
@@ -0,0 +1,126 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import json
+import logging
+import tarfile
+import tempfile
+import shutil
+import torch
+from .file_utils import cached_path
+
+logger = logging.getLogger(__name__)
+
+class PretrainedConfig(object):
+
+    pretrained_model_archive_map = {}
+    config_name = ""
+    weights_name = ""
+
+    @classmethod
+    def get_config(cls, pretrained_model_name, cache_dir, type_vocab_size, state_dict, task_config=None):
+        archive_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), pretrained_model_name)
+        if os.path.exists(archive_file) is False:
+            if pretrained_model_name in cls.pretrained_model_archive_map:
+                archive_file = cls.pretrained_model_archive_map[pretrained_model_name]
+            else:
+                archive_file = pretrained_model_name
+
+        # redirect to the cache, if necessary
+        try:
+            resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        except FileNotFoundError:
+            if task_config is None or task_config.local_rank == 0:
+                logger.error(
+                    "Model name '{}' was not found in model name list. "
+                    "We assumed '{}' was a path or url but couldn't find any file "
+                    "associated to this path or url.".format(
+                        pretrained_model_name,
+                        archive_file))
+            return None
+        if resolved_archive_file == archive_file:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {}".format(archive_file))
+        else:
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("loading archive file {} from cache at {}".format(
+                    archive_file, resolved_archive_file))
+        tempdir = None
+        if os.path.isdir(resolved_archive_file):
+            serialization_dir = resolved_archive_file
+        else:
+            # Extract archive to temp dir
+            tempdir = tempfile.mkdtemp()
+            if task_config is None or task_config.local_rank == 0:
+                logger.info("extracting archive file {} to temp dir {}".format(
+                    resolved_archive_file, tempdir))
+            with tarfile.open(resolved_archive_file, 'r:gz') as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        # Load config
+        config_file = os.path.join(serialization_dir, cls.config_name)
+        config = cls.from_json_file(config_file)
+        config.type_vocab_size = type_vocab_size
+        if task_config is None or task_config.local_rank == 0:
+            logger.info("Model config {}".format(config))
+
+        if state_dict is None:
+            weights_path = os.path.join(serialization_dir, cls.weights_name)
+            if os.path.exists(weights_path):
+                state_dict = torch.load(weights_path, map_location='cpu')
+            else:
+                if task_config is None or task_config.local_rank == 0:
+                    logger.info("Weight doesn't exsits. {}".format(weights_path))
+
+        if tempdir:
+            # Clean up temp dir
+            shutil.rmtree(tempdir)
+
+        return config, state_dict
+
+    @classmethod
+    def from_dict(cls, json_object):
+        """Constructs a `BertConfig` from a Python dictionary of parameters."""
+        config = cls(vocab_size_or_config_json_file=-1)
+        for key, value in json_object.items():
+            config.__dict__[key] = value
+        return config
+
+    @classmethod
+    def from_json_file(cls, json_file):
+        """Constructs a `BertConfig` from a json file of parameters."""
+        with open(json_file, "r", encoding='utf-8') as reader:
+            text = reader.read()
+        return cls.from_dict(json.loads(text))
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/until_module.py b/yc2_univl/backup/pdvc/modules/until_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..d550638157f8aeb2116a9cce022b2c563fd3491b
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/until_module.py
@@ -0,0 +1,251 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+
+import logging
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+import math
+from pdvc.modules.until_config import PretrainedConfig
+
+logger = logging.getLogger(__name__)
+
+def gelu(x):
+    """Implementation of the gelu activation function.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+def swish(x):
+    return x * torch.sigmoid(x)
+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
+
+class LayerNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-12):
+        """Construct a layernorm module in the TF style (epsilon inside the square root).
+        """
+        super(LayerNorm, self).__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.bias = nn.Parameter(torch.zeros(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, x):
+        u = x.mean(-1, keepdim=True)
+        s = (x - u).pow(2).mean(-1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+        return self.weight * x + self.bias
+
+class PreTrainedModel(nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
+    def __init__(self, config, *inputs, **kwargs):
+        super(PreTrainedModel, self).__init__()
+        if not isinstance(config, PretrainedConfig):
+            raise ValueError(
+                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                "To create a model from a Google pretrained model use "
+                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                    self.__class__.__name__, self.__class__.__name__
+                ))
+        self.config = config
+
+    def init_weights(self, module):
+        """ Initialize the weights.
+        """
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, LayerNorm):
+            if 'beta' in dir(module) and 'gamma' in dir(module):
+                module.beta.data.zero_()
+                module.gamma.data.fill_(1.0)
+            else:
+                module.bias.data.zero_()
+                module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def resize_token_embeddings(self, new_num_tokens=None):
+        raise NotImplementedError
+
+    @classmethod
+    def init_preweight(cls, model, state_dict, prefix=None, task_config=None):
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if 'gamma' in key:
+                new_key = key.replace('gamma', 'weight')
+            if 'beta' in key:
+                new_key = key.replace('beta', 'bias')
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        if prefix is not None:
+            old_keys = []
+            new_keys = []
+            for key in state_dict.keys():
+                old_keys.append(key)
+                new_keys.append(prefix + key)
+            for old_key, new_key in zip(old_keys, new_keys):
+                state_dict[new_key] = state_dict.pop(old_key)
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, '_metadata', None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=''):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + '.')
+
+        load(model, prefix='')
+
+        if prefix is None and (task_config is None or task_config.local_rank == 0):
+            logger.info("-" * 20)
+            if len(missing_keys) > 0:
+                logger.info("Weights of {} not initialized from pretrained model: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(missing_keys)))
+            if len(unexpected_keys) > 0:
+                logger.info("Weights from pretrained model not used in {}: {}"
+                            .format(model.__class__.__name__, "\n   " + "\n   ".join(unexpected_keys)))
+            if len(error_msgs) > 0:
+                logger.error("Weights from pretrained model cause errors in {}: {}"
+                             .format(model.__class__.__name__, "\n   " + "\n   ".join(error_msgs)))
+
+        return model
+
+    @property
+    def dtype(self):
+        """
+        :obj:`torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+            def find_tensor_attributes(module: nn.Module):
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
+    @classmethod
+    def from_pretrained(cls, config, state_dict=None,  *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedModel from a pre-trained model file or a pytorch state dict.
+        Download and cache the pre-trained model file if needed.
+        """
+        # Instantiate model.
+        model = cls(config, *inputs, **kwargs)
+        if state_dict is None:
+            return model
+        model = cls.init_preweight(model, state_dict)
+
+        return model
+
+##################################
+###### LOSS FUNCTION #############
+##################################
+class CrossEn(nn.Module):
+    def __init__(self,):
+        super(CrossEn, self).__init__()
+
+    def forward(self, sim_matrix):
+        logpt = F.log_softmax(sim_matrix, dim=-1)
+        logpt = torch.diag(logpt)
+        nce_loss = -logpt
+        sim_loss = nce_loss.mean()
+        return sim_loss
+
+class MILNCELoss(nn.Module):
+    def __init__(self, batch_size=1, n_pair=1,):
+        super(MILNCELoss, self).__init__()
+        self.batch_size = batch_size
+        self.n_pair = n_pair
+        torch_v = float(".".join(torch.__version__.split(".")[:2]))
+        self.bool_dtype = torch.bool if torch_v >= 1.3 else torch.uint8
+
+    def forward(self, sim_matrix):
+        mm_mask = np.eye(self.batch_size)
+        mm_mask = np.kron(mm_mask, np.ones((self.n_pair, self.n_pair)))
+        mm_mask = torch.tensor(mm_mask).float().to(sim_matrix.device)
+
+        from_text_matrix = sim_matrix + mm_mask * -1e12
+        from_video_matrix = sim_matrix.transpose(1, 0)
+
+        new_sim_matrix = torch.cat([from_video_matrix, from_text_matrix], dim=-1)
+        logpt = F.log_softmax(new_sim_matrix, dim=-1)
+
+        mm_mask_logpt = torch.cat([mm_mask, torch.zeros_like(mm_mask)], dim=-1)
+        masked_logpt = logpt + (torch.ones_like(mm_mask_logpt) - mm_mask_logpt) * -1e12
+
+        new_logpt = -torch.logsumexp(masked_logpt, dim=-1)
+
+        logpt_choice = torch.zeros_like(new_logpt)
+        mark_ind = torch.arange(self.batch_size).to(sim_matrix.device) * self.n_pair + (self.n_pair//2)
+        logpt_choice[mark_ind] = 1
+        sim_loss = new_logpt.masked_select(logpt_choice.to(dtype=self.bool_dtype)).mean()
+        return sim_loss
+
+class MaxMarginRankingLoss(nn.Module):
+    def __init__(self,
+                 margin=1.0,
+                 negative_weighting=False,
+                 batch_size=1,
+                 n_pair=1,
+                 hard_negative_rate=0.5,
+        ):
+        super(MaxMarginRankingLoss, self).__init__()
+        self.margin = margin
+        self.n_pair = n_pair
+        self.batch_size = batch_size
+        easy_negative_rate = 1 - hard_negative_rate
+        self.easy_negative_rate = easy_negative_rate
+        self.negative_weighting = negative_weighting
+        if n_pair > 1 and batch_size > 1:
+            alpha = easy_negative_rate / ((batch_size - 1) * (1 - easy_negative_rate))
+            mm_mask = (1 - alpha) * np.eye(self.batch_size) + alpha
+            mm_mask = np.kron(mm_mask, np.ones((n_pair, n_pair)))
+            mm_mask = torch.tensor(mm_mask) * (batch_size * (1 - easy_negative_rate))
+            self.mm_mask = mm_mask.float()
+
+    def forward(self, x):
+        d = torch.diag(x)
+        max_margin = F.relu(self.margin + x - d.view(-1, 1)) + \
+                     F.relu(self.margin + x - d.view(1, -1))
+        if self.negative_weighting and self.n_pair > 1 and self.batch_size > 1:
+            max_margin = max_margin * self.mm_mask.to(max_margin.device)
+        return max_margin.mean()
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json b/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..324fcb6e7ba63166767adf9afa82324412247a48
--- /dev/null
+++ b/yc2_univl/backup/pdvc/modules/visual-base/visual_config.json
@@ -0,0 +1,12 @@
+{
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 1,
+  "vocab_size": 1024
+}
diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..5f86c9097b3b6f4b7f50b9d70f7cd58b2f386871
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/PKG-INFO
@@ -0,0 +1,6 @@
+Metadata-Version: 2.1
+Name: MultiScaleDeformableAttention
+Version: 1.0
+Summary: PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention
+Home-page: https://github.com/fundamentalvision/Deformable-DETR
+Author: Weijie Su
diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc251e74aff93cae99a730109d3f696ef326b210
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+setup.py
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
+/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
+MultiScaleDeformableAttention.egg-info/PKG-INFO
+MultiScaleDeformableAttention.egg-info/SOURCES.txt
+MultiScaleDeformableAttention.egg-info/dependency_links.txt
+MultiScaleDeformableAttention.egg-info/top_level.txt
+functions/__init__.py
+functions/ms_deform_attn_func.py
+modules/__init__.py
+modules/ms_deform_attn.py
+modules/ms_deform_attn_for_caption.py
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d8f7790d14d04a74c6acec779aedb3688ef630
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/MultiScaleDeformableAttention.egg-info/top_level.txt
@@ -0,0 +1,3 @@
+MultiScaleDeformableAttention
+functions
+modules
diff --git a/yc2_univl/backup/pdvc/ops/__init__.py b/yc2_univl/backup/pdvc/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ed3d8ddca46efead59543bfd2f1961790abdc96
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c722836c6716e99f5a33542ebc2461e4540b9c0
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..affe1b85a7c92a8c1ecfca0d0b2c329ce77bf383
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/MultiScaleDeformableAttention.cpython-37m-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da5720c8c8f59f4168baf51ec63ba9c5f5e90d5abb998c0fbdd6170547d23a13
+size 7942000
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/functions/ms_deform_attn_func.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+try:
+    import MultiScaleDeformableAttention as MSDA
+except:
+    pass
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape    # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head
+
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape  # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points
+
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1]
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='border', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+
+    if return_value:
+        return torch.stack(sampling_value_list, dim=-2)
+    #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn import MSDeformAttn
+from .ms_deform_attn_for_caption import MSDeformAttnCap
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn.py
@@ -0,0 +1,126 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points )
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        if query.device.type == 'cuda':
+            output = MSDeformAttnFunction.apply(
+                value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights,
+                self.im2col_step)
+        else:
+            output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights)
+        output = self.output_proj(output)
+        return output
diff --git a/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/lib.linux-x86_64-cpython-37/modules/ms_deform_attn_for_caption.py
@@ -0,0 +1,123 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttnCap(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        grid_init = grid_init - grid_init.mean(2, keepdim=True)
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 1
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights,
+                                             return_value=True)
+
+        return output
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps
new file mode 100644
index 0000000000000000000000000000000000000000..2bef29d420f02b4282644cba394698912212dab8
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_deps differ
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log
new file mode 100644
index 0000000000000000000000000000000000000000..fd78ae63cd064bb569f9279931f2e0668833f50d
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/.ninja_log
@@ -0,0 +1,4 @@
+# ninja log v5
+0	2930	1685020146224081877	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o	8f7db54445222f0
+0	10580	1685020153869972218	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o	91f10249ca524b9b
+0	13795	1685020157081510628	/cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o	3e48c35d2c631cee
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja
new file mode 100644
index 0000000000000000000000000000000000000000..9d156fb45877ed14f310b8ae1f889c048fe0fa2b
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/build.ninja
@@ -0,0 +1,30 @@
+ninja_required_version = 1.3
+cxx = c++
+nvcc = /usr/local/cuda/bin/nvcc
+
+cflags = -pthread -B /home/liuhuabin/miniconda3/envs/PDVC/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c
+post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
+cuda_cflags = -DWITH_CUDA -I/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/TH -I/home/liuhuabin/miniconda3/envs/PDVC/lib/python3.7/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/liuhuabin/miniconda3/envs/PDVC/include/python3.7m -c
+cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=MultiScaleDeformableAttention -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 -std=c++14
+ldflags = 
+
+rule compile
+  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
+  depfile = $out.d
+  deps = gcc
+
+rule cuda_compile
+  depfile = $out.d
+  deps = gcc
+  command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags
+
+
+
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o: cuda_compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
+build /cpfs01/user/liuhuabin/PDVC/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o: compile /cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.cpp
+
+
+
+
+
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o
new file mode 100644
index 0000000000000000000000000000000000000000..d30f1ff54acc23e3e0f5ea22b3a8828fdd2c44b7
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cpu/ms_deform_attn_cpu.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59afa2abc476414b1faa6816920a93293fc9e71aa96d790c80760a879f5d0682
+size 1437672
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..d9274a1b895a7c123eab8231e2e24c2ea6629581
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/cuda/ms_deform_attn_cuda.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:973f1d16162f782172da95253065226cd068f45430bbc1a8920929ffda09947d
+size 920176
diff --git a/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o
new file mode 100644
index 0000000000000000000000000000000000000000..e771be34bcbacfa86a2e41f1728b9d0b2fef3a85
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/build/temp.linux-x86_64-cpython-37/cpfs01/user/liuhuabin/PDVC/pdvc/ops/src/vision.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad8100cd431dec4d7ef8dc5d144c90402c71b4b41a772e5f120c38b8fe9aa0e
+size 10423896
diff --git a/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg b/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg
new file mode 100644
index 0000000000000000000000000000000000000000..dc5bbc86e1f4304b490711416d30dbeecec3a2b8
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/dist/MultiScaleDeformableAttention-1.0-py3.7-linux-x86_64.egg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64ad69121c719dc533912a5233ee2ba4d895fd745283dc122601f20b0da2a519
+size 2223428
diff --git a/yc2_univl/backup/pdvc/ops/functions/__init__.py b/yc2_univl/backup/pdvc/ops/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f682455af45d3687f0266acce6018741fe7c303
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00b83c1e1d8810a77347e3d76609cdf347898186
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09ce00b445b4c8d76b027f013de6cb094dae82dc
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..222160988ac28f5eba55fe2acff1a6b176b3429b
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fc0981ca1144f3eb8a7166b570fb797f8004a16
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/functions/__pycache__/ms_deform_attn_func.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py b/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59ddc33cf54f23c8b38e192c1421f0c79ebd38b
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/functions/ms_deform_attn_func.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+try:
+    import MultiScaleDeformableAttention as MSDA
+except:
+    pass
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        # sampling_locations:(...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights, return_value=False):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape    # N_: batch size , S_: \sum_H*W, M_ : head number, D_: feature dim of each head
+
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape  # Lq_: \sum H*W, L_: multi-scale number, P_: number of sampled key points
+
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1 # convert value from range[0,1] to [-1, 1]
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # sampling_grid_l_: (...,2), the first item of last dim means x axis corresponding to w, and second item of the last dim means y, corresponding to h.
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='border', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+
+    if return_value:
+        return torch.stack(sampling_value_list, dim=-2)
+    #(N_ * M_, D_, Lq_, L_* P_) * (N_*M_, 1, Lq_, L_*P_) --> (N_*M_, D_, Lq_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/yc2_univl/backup/pdvc/ops/make.sh b/yc2_univl/backup/pdvc/ops/make.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a7e4320108ecd2f02d1824505849850b0c69d319
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/make.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+python setup.py build install
diff --git a/yc2_univl/backup/pdvc/ops/modules/__init__.py b/yc2_univl/backup/pdvc/ops/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceef895ac021db2b6b1762dda3d65c433e09e6e9
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/modules/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn import MSDeformAttn
+from .ms_deform_attn_for_caption import MSDeformAttnCap
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd29db0d448db6cc3ebfcb499cb6105d2f745555
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc85ead761d81b2d819429824ee2393e9f50a6ae
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/__init__.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1de99e2b9ab1efc42b399837d8cfd7a09a3e2ef1
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12c1ccbe61ed8ca360ce969e012e60a89d05cece
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..671fa7d00552b0d0913bf502750b061574f7b3f2
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-37.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a23f676c0714c277a628441a7459d2724f62b61
Binary files /dev/null and b/yc2_univl/backup/pdvc/ops/modules/__pycache__/ms_deform_attn_for_caption.cpython-38.pyc differ
diff --git a/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7983d9f64fcff74e89823ad6d7164255f26dda52
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn.py
@@ -0,0 +1,126 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points )
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        if query.device.type == 'cuda':
+            output = MSDeformAttnFunction.apply(
+                value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights,
+                self.im2col_step)
+        else:
+            output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights)
+        output = self.output_proj(output)
+        return output
diff --git a/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6fdc1c220e13146864818a0f79225ca47c7394f
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/modules/ms_deform_attn_for_caption.py
@@ -0,0 +1,123 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from ..functions import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttnCap(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4,):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.attention_weights = nn.Linear(2 * d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2)
+        grid_init = grid_init[..., 0].repeat(1, self.n_levels, self.n_points)
+        for i in range(self.n_points):
+            grid_init[:, :, i] *= i + 1
+        grid_init = grid_init - grid_init.mean(2, keepdim=True)
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 1), range in [0, 1], including padding area
+                                        or (N, Length_{query}, n_levels, 2), add additional (c, l) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} T_l, C)
+        :param input_spatial_shapes        (n_levels ), [T_0, T_1, ..., T_{L-1}]
+        :param input_level_start_index     (n_levels ), [0, 1_0, T_0+T_1, ...]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert input_spatial_shapes.sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 1
+        if reference_points.shape[-1] == 1:
+            offset_normalizer = input_spatial_shapes
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None]
+        elif reference_points.shape[-1] == 2:
+            sampling_locations = reference_points[:, :, None, :, None, 0] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 1] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 1 or 2, but get {} instead.'.format(reference_points.shape[-1]))
+
+
+
+        if True:
+            sampling_locations = torch.stack(
+                (sampling_locations, 0.5 * sampling_locations.new_ones(sampling_locations.shape)), -1)
+            input_spatial_shapes = torch.stack([input_spatial_shapes.new_ones(input_spatial_shapes.shape), input_spatial_shapes], -1)
+
+        output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights,
+                                             return_value=True)
+
+        return output
diff --git a/yc2_univl/backup/pdvc/ops/setup.py b/yc2_univl/backup/pdvc/ops/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0131bc21cf1b45b90fcf174e2c53e4c08e9c641
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/setup.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+import os
+import glob
+
+import torch
+
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension
+from torch.utils.cpp_extension import CUDAExtension
+
+from setuptools import find_packages
+from setuptools import setup
+
+requirements = ["torch", "torchvision"]
+
+def get_extensions():
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "src")
+
+    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
+    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
+
+    sources = main_file + source_cpu
+    extension = CppExtension
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    if torch.cuda.is_available() and CUDA_HOME is not None:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+    else:
+        raise NotImplementedError('Cuda is not availabel')
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+    include_dirs = [extensions_dir]
+    ext_modules = [
+        extension(
+            "MultiScaleDeformableAttention",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+    return ext_modules
+
+setup(
+    name="MultiScaleDeformableAttention",
+    version="1.0",
+    author="Weijie Su",
+    url="https://github.com/fundamentalvision/Deformable-DETR",
+    description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
+    packages=find_packages(exclude=("configs", "tests",)),
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+)
diff --git a/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1bf854de1f3860d20b6fef5c1a17817c268e70a
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.cpp
@@ -0,0 +1,41 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include <vector>
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+
diff --git a/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..81b7b58a3d9502bbb684dc84687a526dedf94cae
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/cpu/ms_deform_attn_cpu.h
@@ -0,0 +1,33 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);
+
+
diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d6d583647cce987196d5ad1968a8a365a379e774
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.cu
@@ -0,0 +1,153 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include <vector>
+#include "cuda/ms_deform_im2col_cuda.cuh"
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+
+    const int num_levels = spatial_shapes.size(0);
+
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+
+    const int im2col_step_ = std::min(batch, im2col_step);
+
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    
+    auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
+
+    const int batch_n = im2col_step_;
+    auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto columns = output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
+            ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
+                value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                spatial_shapes.data<int64_t>(),
+                level_start_index.data<int64_t>(),
+                sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                columns.data<scalar_t>());
+
+        }));
+    }
+
+    output = output.view({batch, num_query, num_heads*channels});
+
+    return output;
+}
+
+
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
+
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
+
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+
+    const int num_levels = spatial_shapes.size(0);
+
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+
+    const int im2col_step_ = std::min(batch, im2col_step);
+
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+
+    auto grad_value = at::zeros_like(value);
+    auto grad_sampling_loc = at::zeros_like(sampling_loc);
+    auto grad_attn_weight = at::zeros_like(attn_weight);
+
+    const int batch_n = im2col_step_;
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto grad_output_g = grad_output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
+            ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
+                                    grad_output_g.data<scalar_t>(),
+                                    value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                                    spatial_shapes.data<int64_t>(),
+                                    level_start_index.data<int64_t>(),
+                                    sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                                    batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                                    grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
+                                    grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
+
+        }));
+    }
+
+    return {
+        grad_value, grad_sampling_loc, grad_attn_weight
+    };
+}
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7ae53f99c820ce6193b608ad344550348a0b42c
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_attn_cuda.h
@@ -0,0 +1,30 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);
+
diff --git a/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..5635be7822e7cbfb8b5524185f213a9368a91dce
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/cuda/ms_deform_im2col_cuda.cuh
@@ -0,0 +1,1328 @@
+/*!
+**************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************
+* Modified from DCN (https://github.com/msracver/Deformable-ConvNets)
+* Copyright (c) 2018 Microsoft
+**************************************************************************
+*/
+
+#include <cstdio>
+#include <algorithm>
+#include <cstring>
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+#include <THC/THCAtomics.cuh>
+
+// 使用相同间隔分配block
+#define CUDA_KERNEL_LOOP(i, n)                          \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \
+      i < (n);                                          \
+      i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N, const int num_threads)
+{
+  return (N + num_threads - 1) / num_threads;
+}
+
+
+template <typename scalar_t>
+__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+  }
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value, 
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value); 
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  *grad_attn_weight = top_grad * val;
+  *grad_sampling_loc = width * grad_w_weight * top_grad_value;
+  *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;
+}
+
+
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, 
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value, 
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value); 
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  atomicAdd(grad_attn_weight, top_grad * val); 
+  atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);
+  atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_im2col_gpu_kernel(const int n,
+                                                const scalar_t *data_value, 
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    scalar_t *data_col_ptr = data_col + index;
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    scalar_t col = 0;
+    
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride);
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight;
+        }
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+      }
+    }
+    *data_col_ptr = col;
+  }
+}
+
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockSize; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          
+          
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockSize/2; s>0; s>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        { 
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockDim.x; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          
+          
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            } 
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        {
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        
+        __syncthreads();
+
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            }
+          }
+          __syncthreads();
+        }
+
+        if (tid == 0)
+        {
+          atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);
+          atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);
+          atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);
+        }
+        __syncthreads();
+
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index, 
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size, 
+                                                const int spatial_size, 
+                                                const int num_heads,
+                                                const int channels, 
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp; 
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+
+    const scalar_t top_grad = grad_col[index];
+
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear_gm(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr, 
+            grad_sampling_loc, grad_attn_weight);
+        }
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+void ms_deformable_im2col_cuda(cudaStream_t stream,
+                              const scalar_t* data_value,
+                              const int64_t* data_spatial_shapes, 
+                              const int64_t* data_level_start_index, 
+                              const scalar_t* data_sampling_loc,
+                              const scalar_t* data_attn_weight,
+                              const int batch_size,
+                              const int spatial_size, 
+                              const int num_heads, 
+                              const int channels, 
+                              const int num_levels, 
+                              const int num_query,
+                              const int num_point,
+                              scalar_t* data_col)
+{
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  const int num_threads = CUDA_NUM_THREADS;
+  ms_deformable_im2col_gpu_kernel<scalar_t>
+      <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+          0, stream>>>(
+      num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, 
+      batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col);
+  
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
+
+template <typename scalar_t>
+void ms_deformable_col2im_cuda(cudaStream_t stream,
+                              const scalar_t* grad_col,
+                              const scalar_t* data_value,
+                              const int64_t * data_spatial_shapes,
+                              const int64_t * data_level_start_index,
+                              const scalar_t * data_sampling_loc,
+                              const scalar_t * data_attn_weight,
+                              const int batch_size, 
+                              const int spatial_size, 
+                              const int num_heads,
+                              const int channels, 
+                              const int num_levels,
+                              const int num_query,
+                              const int num_point, 
+                              scalar_t* grad_value,
+                              scalar_t* grad_sampling_loc,
+                              scalar_t* grad_attn_weight)
+{
+  const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels;
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  if (channels > 1024)
+  {
+    if ((channels & 1023) == 0)
+    {
+      ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+    }
+    else
+    {
+      ms_deformable_col2im_gpu_kernel_gm<scalar_t>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+    }
+  }
+  else{
+    switch(channels)
+    {
+      case 1:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 1>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 2:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 2>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 4:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 4>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 8:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 8>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 16:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 16>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 32:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 32>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 64:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 64>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 128:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 128>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 256:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 256>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 512:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 512>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 1024:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 1024>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels, 
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index, 
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size, 
+                      spatial_size, 
+                      num_heads,
+                      channels, 
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      default:
+        if (channels < 64)
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v1<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+        else
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v2<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels, 
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index, 
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size, 
+                        spatial_size, 
+                        num_heads,
+                        channels, 
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+    }
+  }
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h b/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac0ef2ec25f7d0ee51ca2d807b159ddf85652017
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/ms_deform_attn.h
@@ -0,0 +1,62 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#pragma once
+
+#include "cpu/ms_deform_attn_cpu.h"
+
+#ifdef WITH_CUDA
+#include "cuda/ms_deform_attn_cuda.h"
+#endif
+
+
+at::Tensor
+ms_deform_attn_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_forward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+
+std::vector<at::Tensor>
+ms_deform_attn_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_backward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+
diff --git a/yc2_univl/backup/pdvc/ops/src/vision.cpp b/yc2_univl/backup/pdvc/ops/src/vision.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2201f63a51dca16d0b31148ed2c9e8e47ec15bdc
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/src/vision.cpp
@@ -0,0 +1,16 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+
+#include "ms_deform_attn.h"
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
+  m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
+}
diff --git a/yc2_univl/backup/pdvc/ops/test.py b/yc2_univl/backup/pdvc/ops/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbf6d5547d131f01a8c5c28b76557bd27a9334b
--- /dev/null
+++ b/yc2_univl/backup/pdvc/ops/test.py
@@ -0,0 +1,89 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import time
+import torch
+import torch.nn as nn
+from torch.autograd import gradcheck
+
+from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+
+
+N, M, D = 1, 2, 2
+Lq, L, P = 2, 2, 2
+shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
+level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
+S = sum([(H*W).item() for H, W in shapes])
+
+
+torch.manual_seed(3)
+
+
+@torch.no_grad()
+def check_forward_equal_with_pytorch_double():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+
+    print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+
+
+@torch.no_grad()
+def check_forward_equal_with_pytorch_float():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+
+    print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+
+
+def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
+
+    value = torch.rand(N, S, M, channels).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    func = MSDeformAttnFunction.apply
+
+    value.requires_grad = grad_value
+    sampling_locations.requires_grad = grad_sampling_loc
+    attention_weights.requires_grad = grad_attn_weight
+
+    gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
+
+    print(f'* {gradok} check_gradient_numerical(D={channels})')
+
+
+if __name__ == '__main__':
+    check_forward_equal_with_pytorch_double()
+    check_forward_equal_with_pytorch_float()
+
+    for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
+        check_gradient_numerical(channels, True, True, True)
+
+
+
diff --git a/yc2_univl/backup/pdvc/pdvc.py b/yc2_univl/backup/pdvc/pdvc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f7ffe3067b2a1382a79c0efc5a8ac828baa9c03
--- /dev/null
+++ b/yc2_univl/backup/pdvc/pdvc.py
@@ -0,0 +1,1305 @@
+# ------------------------------------------------------------------------
+# PDVC
+# ------------------------------------------------------------------------
+# Modified from Deformable DETR(https://github.com/fundamentalvision/Deformable-DETR)
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+import json
+import torch
+import torch.nn.functional as F
+from torch import nn
+import math
+import time
+
+from misc.detr_utils import box_ops
+from misc.detr_utils.misc import (inverse_sigmoid)
+
+from .matcher import build_matcher
+
+from .deformable_transformer import build_deforamble_transformer
+from pdvc.CaptioningHead import build_captioner
+import copy
+from .criterion import AlignCriterion, SetCriterion, ContrastiveCriterion
+# from .rl_tool import init_scorer
+from misc.utils import decide_two_stage
+from .base_encoder import build_base_encoder
+# from .video_segmentation import segment_video_into_steps, alignment_to_boundary, to_center_duration, align_frame_into_steps
+from .video_segmentation import *
+# from transformers import AutoModel, BertConfig
+# from transformers.models.bert.modeling_bert import BertEncoder
+import numpy as np
+from itertools import chain
+# from .UniVL import load_pretrained_UniVL
+
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+class PDVC(nn.Module):
+    """ This is the PDVC module that performs dense video captioning """
+
+    def __init__(self, base_encoder, transformer, captioner, num_classes, num_queries, num_feature_levels,
+                 aux_loss=True, with_box_refine=False, opt=None, translator=None):
+        """ Initializes the model.
+        Parameters:
+            transformer: torch module of the transformer architecture. See transformer.py
+            captioner: captioning head for generate a sentence for each event queries
+            num_classes: number of foreground classes
+            num_queries: number of event queries. This is the maximal number of events
+                         PDVC can detect in a single video. For ActivityNet Captions, we recommend 10-30 queries.
+            aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used.
+            with_box_refine: iterative bounding box refinement
+            opt: all configs
+        """
+        super().__init__()
+        self.opt = opt
+        self.base_encoder = base_encoder
+        self.transformer = transformer
+        self.caption_head = captioner
+        num_pred_text = 0
+
+        # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+        #     self.text_encoder = text_encoder
+        #     text_encoder_hidden_dim = self.text_encoder.config.hidden_size
+        #     num_pred_text += 1
+
+        hidden_dim = transformer.d_model
+        text_hidden_dim = opt.text_hidden_dim
+        
+        if self.opt.use_anchor:
+            # self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+            self.anchor_embed = nn.Embedding(num_queries, 2) # num_queries, 2 (center, duration) 
+            self.query_embed = self.transformer.prepare_init_anchor_and_query(self.anchor_embed, hidden_dim, \
+                                                                        random_anchor_init=True, prior_anchor_duration_init=True, \
+                                                                        prior_duration=0.048)
+            self.query_embed = nn.Parameter(self.query_embed, requires_grad=True)
+        else:
+            self.query_embed = nn.Embedding(num_queries, hidden_dim * 2)
+
+        self.class_head = nn.Linear(hidden_dim, num_classes)
+        self.class_refine_head = nn.Linear(hidden_dim, num_classes) # For refine pseudo box if use additional score layer
+        self.count_head = nn.Linear(hidden_dim, opt.max_eseq_length + 1)
+        self.bbox_head = MLP(hidden_dim, hidden_dim, 2, 3)
+
+        self.num_feature_levels = num_feature_levels
+        self.aux_loss = aux_loss
+        self.with_box_refine = with_box_refine
+        self.share_caption_head = opt.share_caption_head
+
+        # initialization
+        prior_prob = 0.01
+        bias_value = -math.log((1 - prior_prob) / prior_prob)
+        self.class_head.bias.data = torch.ones(num_classes) * bias_value
+        self.class_refine_head.bias.data = torch.ones(num_classes) * bias_value
+        nn.init.constant_(self.bbox_head.layers[-1].weight.data, 0)
+        nn.init.constant_(self.bbox_head.layers[-1].bias.data, 0)
+
+        if self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim' \
+            or self.opt.use_pseudo_box:
+            self.load_text_embed = True
+        else:
+            self.load_text_embed = False
+
+
+        num_pred = transformer.decoder.num_layers
+        if self.share_caption_head:
+            print('all decoder layers share the same caption head')
+            self.caption_head = nn.ModuleList([self.caption_head for _ in range(num_pred)])
+        else:
+            print('do NOT share the caption head')
+            self.caption_head = _get_clones(self.caption_head, num_pred)
+
+        if self.opt.use_additional_cap_layer:
+            self.caption_head_refine = _get_clones(captioner, self.opt.refine_pseudo_stage_num)
+
+        if with_box_refine:
+            self.class_head = _get_clones(self.class_head, num_pred)
+            self.count_head = _get_clones(self.count_head, num_pred)
+            self.bbox_head = _get_clones(self.bbox_head, num_pred)
+            nn.init.constant_(self.bbox_head[0].layers[-1].bias.data[1:], -2)
+            # hack implementation for iterative bounding box refinement
+            self.transformer.decoder.bbox_head = self.bbox_head
+        else:
+            nn.init.constant_(self.bbox_head.layers[-1].bias.data[1:], -2)
+            self.class_head = nn.ModuleList([self.class_head for _ in range(num_pred)])
+            self.count_head = nn.ModuleList([self.count_head for _ in range(num_pred)])
+            self.bbox_head = nn.ModuleList([self.bbox_head for _ in range(num_pred)])
+            self.transformer.decoder.bbox_head = None
+
+        self.class_refine_head = _get_clones(self.class_refine_head, self.opt.refine_pseudo_stage_num)
+        # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+        if opt.disable_contrastive_projection:
+            projection_event = nn.Identity()
+            projection_text = nn.Identity()
+        else:
+            projection_event = nn.Linear(hidden_dim, opt.contrastive_hidden_size)
+            projection_text = nn.Linear(text_hidden_dim, opt.contrastive_hidden_size)
+        self.contrastive_projection_event = nn.ModuleList(
+            [projection_event for _ in range(num_pred)])
+        self.contrastive_projection_text = nn.ModuleList(
+            [projection_text for _ in range(num_pred)])
+        if opt.enable_bg_for_cl:
+            self.background_embed = nn.Parameter(torch.randn(1, opt.contrastive_hidden_size), requires_grad=True)
+        else:
+            self.background_embed = None
+            
+
+        self.translator = translator
+
+        self.disable_mid_caption_heads = opt.disable_mid_caption_heads
+        if self.disable_mid_caption_heads:
+            print('only calculate caption loss in the last decoding layer')
+        
+        self.pseudo_boxes = {}
+        
+
+    def get_filter_rule_for_encoder(self):
+        filter_rule = lambda x: 'input_proj' in x \
+                                or 'transformer.encoder' in x \
+                                or 'transformer.level_embed' in x \
+                                or 'base_encoder' in x
+        return filter_rule
+
+    def encoder_decoder_parameters(self):
+        filter_rule = self.get_filter_rule_for_encoder()
+        enc_paras = []
+        dec_paras = []
+        for name, para in self.named_parameters():
+            if filter_rule(name):
+                print('enc: {}'.format(name))
+                enc_paras.append(para)
+            else:
+                print('dec: {}'.format(name))
+                dec_paras.append(para)
+        return enc_paras, dec_paras
+
+    # def text_encoding(self, text_encoder_input):
+    #     '''
+    #     Produce the text embedding for each caption
+    #     :param text_encoder_input: a dict of input for text encoder
+    #     '''
+    #     if self.opt.pretrained_language_model == 'UniVL' or self.opt.use_pseudo_box:
+    #         # breakpoint()
+    #         dtype = next(self.parameters()).dtype
+    #         enable_grad = False
+    #         use_amp = False
+    #         with torch.cuda.amp.autocast(enabled=use_amp):
+    #             with torch.set_grad_enabled(enable_grad):
+    #                 text_embed = self.text_encoder(**text_encoder_input, output_all_encoded_layers=True)[0][-1]
+    #         text_embed = text_embed.to(dtype=dtype) # num_sentence, num_word, dim
+    #         attention_mask = text_encoder_input['attention_mask'].unsqueeze(-1).to(dtype=dtype) # num_sentence, num_word, 1
+    #         attention_mask[:,0,:] = 0. # This operation follows from the UniVL 
+    #         text_embed = text_embed * attention_mask # num_sentence, num_word, dim
+    #         text_embed = text_embed.sum(dim=1) / attention_mask.sum(dim=1) # num_sentence, dim
+    #         raw_text_embed = text_embed
+    #         # if video_name:
+    #         #     text_feature_path = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text'
+    #         #     np.save('{}/{}.npy'.format(text_feature_path, video_name), text_embed.detach().cpu().numpy())
+    #         text_embed = self.contrastive_projection_text[-1](text_embed)
+            
+    #     else:
+    #         dtype = next(self.parameters()).dtype
+    #         enable_grad = False
+    #         use_amp = False
+    #         with torch.cuda.amp.autocast(enabled=use_amp):
+    #             with torch.set_grad_enabled(enable_grad):
+    #                 text_embed = self.text_encoder(**text_encoder_input)
+    #         text_embed = text_embed['pooler_output'].to(dtype=dtype) # num_sentence, dim
+    #         text_embed = self.contrastive_projection_text[-1](text_embed) # num_sentence, dim_contrastive_learning
+    #         # TODO: add more paradigm to generate the text_embedding
+
+    #     return text_embed, raw_text_embed
+
+    def forward(self, dt, criterion, contrastive_criterion, eval_mode=False):
+        transformer_input_type = self.opt.transformer_input_type
+        vf = dt['video_tensor']  # (N, L, C)
+        mask = ~ dt['video_mask']  # (N, L)
+        duration = dt['video_length'][:, 1]
+        video_name = dt['video_key'][0][2:]
+        # text_encoder_input = dt['text_encoder_input'] if (self.opt.matcher_type=='DTW' or self.opt.use_pseudo_box) else None
+        N, L, C = vf.shape
+        # assert N == 1, "batch size must be 1."s
+
+        srcs, masks, pos = self.base_encoder(vf, mask, duration)
+
+        src_flatten, temporal_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten = self.transformer.prepare_encoder_inputs(
+            srcs, masks, pos)
+        memory = self.transformer.forward_encoder(src_flatten, temporal_shapes, level_start_index, valid_ratios,
+                                                  lvl_pos_embed_flatten, mask_flatten)
+
+        two_stage, disable_iterative_refine, proposals, proposals_mask = decide_two_stage(transformer_input_type,
+                                                                                                dt, criterion)
+        if two_stage:
+            if transformer_input_type == 'prior_proposals':
+                if self.opt.prior_manner == 'add':
+                    #print('Insert the prior knowledge by adding the prior proposals to the query embed')
+                    init_query_embed = self.query_embed.weight
+                    _, tgt = torch.chunk(init_query_embed, 2, dim=1)
+                    tgt = tgt.unsqueeze(0).expand(N, -1, -1)
+                    init_reference, _, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0])
+                    proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool()
+                else:
+                    init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_prior(proposals, num_queries = self.query_embed.weight.shape[0])
+                    proposals_mask = torch.ones(N, self.query_embed.weight.shape[0], device=query_embed.device).bool()
+            else:
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_proposal(
+                    proposals)
+        else:
+            if self.opt.use_anchor:
+                # tgt = self.tgt_embed.weight
+                anchor = self.anchor_embed.weight # num_queries, 2
+                query_anchor = (self.query_embed, anchor)
+                proposals_mask = torch.ones(N, self.query_embed.shape[0], device=self.query_embed.device).bool()
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_anchor(memory, query_anchor)
+            else:            
+                query_embed = self.query_embed.weight
+                proposals_mask = torch.ones(N, query_embed.shape[0], device=query_embed.device).bool()
+                init_reference, tgt, reference_points, query_embed = self.transformer.prepare_decoder_input_query(memory,
+                                                                                                              query_embed)
+        hs, inter_references = self.transformer.forward_decoder(tgt, reference_points, memory, temporal_shapes,
+                                                                level_start_index, valid_ratios, query_embed,
+                                                                mask_flatten, proposals_mask, disable_iterative_refine)
+        # hs: [num_decoder_layer, bs, num_query, feat_dim]
+
+        # breakpoint()
+        # project to co-embedding space
+        if self.load_text_embed and eval_mode==False:
+            # text_embed, raw_text_embed = self.text_encoding(text_encoder_input)
+            # text_embed = [text_embed] * hs.shape[0]
+            # text_embed = torch.stack(text_embed, dim=0)
+            raw_text_embed = dt['cap_embed'] * hs.shape[0]# dt['caption_embedding'] returns a tuple(list)
+            # text_embed: [num_decoder_layer, num_sentence, contrastive_dim]
+            event_embed = torch.stack([self.contrastive_projection_event[i](hs_i) for i, hs_i in enumerate(hs)])
+            text_embed = torch.stack([self.contrastive_projection_text[j](hs_j.cuda()) for j, hs_j in enumerate(raw_text_embed)])
+            # breakpoint()
+            # event_embed: [num_decoder_layer, num_query, contrastive_dim]
+        else:
+            raw_text_embed = None
+            text_embed = None
+            event_embed = hs
+        # breakpoint()
+        if self.opt.use_pseudo_box and self.training:
+            # breakpoint()
+            # print('use pseudo box')
+            video_frame_num = dt['video_length'][:,0].cpu().numpy() # [feature_len, raw_video_len, video_len]
+            video_name = dt['video_key'][0]
+            if self.pseudo_boxes.get(video_name) is not None and 'box' in self.pseudo_boxes[video_name].keys() and 'loss' in self.pseudo_boxes[video_name].keys():
+                # if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2':
+                video_step_alignment = [self.pseudo_boxes[video_name]['box']]
+
+            else:
+                if self.opt.pseudo_box_type == 'align':
+                    video_step_segment = [segment_video_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                    bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+                # elif self.opt.pseudo_box_type == 'similarity':
+                #     video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                #     bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)]
+                # breakpoint()
+                elif self.opt.pseudo_box_type == "similarity":
+                    # breakpoint()
+                    if self.opt.width_ratio < 0:
+                        video_step_alignment = [align_frame_into_steps(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode) for i in range(N)]
+                    else:
+                        video_step_alignment = [align_frame_into_steps_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                            topk=self.opt.top_frames, w=self.opt.window_size, mode=self.opt.statistic_mode, ratio=self.opt.width_ratio) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op':
+                    video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=False, num_iterations=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op_order':
+                    video_step_alignment = [align_frame_into_steps_op(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op_order_v1':
+                    video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]  
+                elif self.opt.pseudo_box_type == 'similarity_op_order_v2':
+                    video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'similarity_op_v2':
+                    video_step_alignment = [align_frame_into_steps_op_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'weight_sim':
+                    if self.opt.width_ratio < 0:
+                        video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)]
+                    else:
+                        # breakpoint()
+                        video_step_alignment = [step_retrieval_weight_sim_order(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                    topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)]
+
+                elif self.opt.pseudo_box_type == 'weight_index':
+                    video_step_alignment = [step_retrieval_weight_index(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                        topk=self.opt.top_frames, w=self.opt.window_size) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'modeframe':
+                    video_step_alignment = [align_frame_into_steps_mode(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
+                                                                        topk=self.opt.top_frames, w=self.opt.window_size, ratio=self.opt.width_ratio) for i in range(N)]
+                elif self.opt.pseudo_box_type == 'uniform':
+                    video_step_alignment = [uniform_box(dt['video_tensor'][i], raw_text_embed[i].to(memory.device)) for i in range(N)]
+                    # breakpoint()
+                else:
+                    raise NotImplementedError('pseudo_box_type {} is not implemented'.format(self.opt.pseudo_box_type))
+                
+
+                if self.opt.pseudo_box_type != 'align':
+                    if self.opt.pseudo_box_type == 'similarity_op_order_v2' or self.opt.pseudo_box_type == 'similarity_op_v2':
+                        # breakpoint()
+                        video_step_alignment, loss_op = [out[0] for out in video_step_alignment], [out[1] for out in video_step_alignment]
+                        self.pseudo_boxes[video_name] = {'box': video_step_alignment[0], 'loss': loss_op[0].item()}
+                    else:
+                        self.pseudo_boxes[video_name] = {'box': video_step_alignment[0]}
+            
+            if self.opt.pseudo_box_type != 'align':
+                bbox_alignment = [(torch.tensor(video_step_alignment[i]) / video_frame_num).to(memory.device).to(torch.float32) for i in range(N)]
+            else:
+                bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+
+                
+                # self.pseudo_boxes[video_name] = video_step_alignment[0]
+                # self.pseudo_boxes[video_name] = video_step_alignment[0]
+                # bbox_alignment = [torch.tensor(alignment_to_boundary(video_step_segment[i], video_frame_num)).to(memory.device) for i in range(N)]
+
+            bbox_alignment = to_center_duration(bbox_alignment)
+
+
+            for sample in range(len(dt['video_target'])):
+                dt['video_target'][sample]['boxes_pseudo'] = bbox_alignment[sample]
+                # dt['video_target'][sample]['boxes'] = bbox_alignment[sample]
+        # else:
+        #     print('use gt box')
+
+        #breakpoint()
+        others = {'memory': memory,
+                  'mask_flatten': mask_flatten,
+                  'spatial_shapes': temporal_shapes,
+                  'level_start_index': level_start_index,
+                  'valid_ratios': valid_ratios,
+                  'proposals_mask': proposals_mask,
+                  'text_embed': text_embed,
+                  'event_embed': event_embed}
+        # breakpoint()
+        if eval_mode or self.opt.caption_loss_coef == 0:
+            out, loss = self.parallel_prediction_full(dt, criterion, hs, init_reference, inter_references, others,
+                                                      disable_iterative_refine, transformer_input_type)
+        else:
+            if self.opt.refine_pseudo_box and self.opt.use_pseudo_box:
+                # print('refine')
+                out, loss = self.parallel_prediction_refine_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                                         disable_iterative_refine, transformer_input_type)
+            else:
+                # print('no refine')
+                out, loss = self.parallel_prediction_matched(dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                                         disable_iterative_refine, transformer_input_type)
+        return out, loss
+
+    def predict_event_num(self, counter, hs_lid):
+        hs_lid_pool = torch.max(hs_lid, dim=1, keepdim=False)[0]  # [bs, feat_dim]
+        outputs_class0 = counter(hs_lid_pool)
+        return outputs_class0
+
+    def parallel_prediction_full(self, dt, criterion, hs, init_reference, inter_references, others,
+                                 disable_iterative_refine, transformer_input_type='queries'):
+        '''
+        hs: [decoder_layer, bs, num_query, feat_dim]
+        init_reference: [bs, num_query, 1]
+        inter_references: [decoder_layer, bs, num_query, 2]
+        '''
+        outputs_classes = []
+        outputs_classes0 = []
+        outputs_coords = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        num_pred = hs.shape[0]
+        #breakpoint()
+        for l_id in range(hs.shape[0]):
+            if l_id == 0:
+                reference = init_reference
+            else:
+                reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            output_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            n_pred_sentence = output_count.argmax(dim=-1).clamp(min=1).item()
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 4]
+
+            # if self.opt.disable_mid_caption_heads and (l_id != hs.shape[0] - 1):
+            if l_id != hs.shape[0] - 1:
+                cap_probs, seq = self.caption_prediction_eval(
+                    self.caption_head[l_id], dt, hs_lid, reference, others, 'none')
+            else:
+                cap_probs, seq = self.caption_prediction_eval(
+                    self.caption_head[l_id], dt, hs_lid, reference, others, self.opt.caption_decoder_type)  # Only output caption in the last decoding layer
+
+            # if self.opt.use_anchor:
+            #     outputs_coord = reference
+            # else:
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if self.opt.matcher_type == 'DTW':
+                    assert reference.shape[-1] == 2 and tmp.shape[-1] == 2
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :2] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 2]
+
+            outputs_classes.append(outputs_class)
+            outputs_classes0.append(output_count)
+            outputs_coords.append(outputs_coord)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        output_count = torch.stack(outputs_classes0)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+
+        all_out = {'pred_logits': outputs_class,
+                   'pred_count': output_count,
+                   'pred_boxes': outputs_coord,
+                   'caption_probs': outputs_cap_probs,
+                   'seq': outputs_cap_seqs}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+
+        # loss, _, _ = criterion(out, dt['video_target'], others)
+        return out, []
+
+    def parallel_prediction_refine_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                    disable_iterative_refine, transformer_input_type='queries'):
+        
+        outputs_classes = []
+        outputs_counts = []
+        outputs_coords = []
+        outputs_cap_costs = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        cl_match_mats = []
+
+        num_pred = hs.shape[0]
+        if self.opt.pseudo_box_aug:
+            assert self.opt.use_pseudo_box
+            num_sentence = dt['gt_boxes'].size(-2)
+            assert num_sentence == len(dt['cap_raw'][0])
+            if self.opt.pseudo_box_aug_num * num_sentence > self.opt.num_queries:
+                aug_num = self.opt.num_queries // num_sentence
+            else:
+                aug_num = self.opt.pseudo_box_aug_num
+            if self.opt.refine_pseudo_box:
+                ori_dt_cap_tensor = copy.deepcopy(dt['cap_tensor'])
+                ori_dt_cap_mask = copy.deepcopy(dt['cap_mask'])
+            cap_dim = dt['cap_tensor'].shape[-1] #(num_sen, num_max_word)
+            dt['cap_tensor'] = dt['cap_tensor'].repeat(1, aug_num).reshape(-1, cap_dim)
+            dt['cap_mask'] = dt['cap_mask'].repeat(1, aug_num).reshape(-1, cap_dim)
+
+        for l_id in range(num_pred):
+            hs_lid = hs[l_id]
+            reference = init_reference if l_id == 0 else inter_references[
+                l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 2]
+            
+            cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid,
+                                                                                 reference, others, 'none')
+
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :1] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 4]
+
+            # Processing the text embed and event embed for alignment
+            if self.load_text_embed or self.opt.disable_contrastive_projection:
+                assert others['text_embed'].shape[0] == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0])
+                text_embed = others['text_embed'][l_id]   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id] 
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                # event_embed = event_embed.reshape(-1, event_embed.shape[-1])
+                # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat'
+
+
+            if self.opt.enable_contrastive and self.opt.set_cost_cl > 0:
+                assert len(others['text_embed']) == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed']))
+                text_embed = torch.cat(others['text_embed'][l_id], dim=0)   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id]
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t()
+                # cl_match_mat: [num_query, num_sentence]
+                cl_match_mats.append(cl_match_mat)
+            else:
+                cl_match_mats.append(0)
+
+            outputs_classes.append(outputs_class)
+            outputs_counts.append(outputs_count)
+            outputs_coords.append(outputs_coord)
+            # outputs_cap_losses.append(cap_loss)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        outputs_count = torch.stack(outputs_counts)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+        # outputs_cap_loss = torch.stack(outputs_cap_losses)
+
+        all_out = {
+            'pred_logits': outputs_class,
+            'pred_count': outputs_count,
+            'pred_boxes': outputs_coord,
+            'caption_probs': outputs_cap_probs,
+            'seq': outputs_cap_seqs,
+            'cl_match_mats': cl_match_mats}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+
+        # ============================= Refine pseudo box here ================================
+        ks, vs = list(zip(*(all_out.items())))
+        out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+        mil_dict = {}
+        bag_score_cache = []
+        for stage in range(self.opt.refine_pseudo_stage_num):
+            # Decay augment ratio as the stage increases
+            aug_ratio = self.opt.pseudo_box_aug_ratio * (0.5 ** stage)
+            _, last_indices, aux_indices = criterion(out, dt['video_target'], others, aug_num, aug_ratio)
+            # Only use the last decoder layer output to conduct the pseudo box refinement
+            hs_lid = hs[-1]
+            reference = inter_references[-1] #[1, num_query, 2]
+            indices = last_indices[0] # [tensor(): num_matched_query ,tensor(): num_matched_cap]
+            query_indices = indices[0][0] # the indices of matched query is ordered
+            cap_indices = indices[0][1] # the indices of matched sentence is unordered
+            # breakpoint()
+            # num_sentence = cap_indices.size(0) // self.opt.pseudo_box_aug_num
+            cap_sort = torch.sort(cap_indices)[1]
+            reorder_query_indices = query_indices[cap_sort]
+            if self.opt.use_neg_pseudo_box:
+                neg_query_indices = []
+                neg_cap_indices = torch.arange(0,cap_indices.size(0),aug_num).view(num_sentence,-1).repeat(1,self.opt.num_neg_box).view(-1)
+                for i in range(num_sentence):
+                    # select some negetive indices from reordered query indices
+                    candidates_r = (reorder_query_indices[(i+1)*aug_num:])
+                    candidates_l = (reorder_query_indices[:(i)*aug_num])
+                    if (candidates_r.size(0) > 0) and (candidates_l.size(0) > 0):
+                        candidates = torch.cat((candidates_r, candidates_l))
+                    else:
+                        candidates = candidates_r if candidates_r.size(0) > 0 else candidates_l
+                    if candidates.size(0) == 0:
+                        candidates = reorder_query_indices
+                    if candidates.size(0) < self.opt.num_neg_box:
+                        random_selected_indices = torch.randperm(candidates.size(0))
+                        padding_num = self.opt.num_neg_box - candidates.size(0)
+                        random_selected_indices = torch.cat((random_selected_indices, random_selected_indices[:padding_num]))
+                    else:
+                        random_selected_indices = torch.randperm(reorder_query_indices.size(0)-aug_num)[:self.opt.num_neg_box]
+                    neg_query_indices.append(candidates[random_selected_indices])
+                neg_query_indices = torch.cat(neg_query_indices)
+                neg_indices = [(neg_query_indices, neg_cap_indices)]
+            # query_indices: ordered, cap_indices: unordered
+            # ++++++ <1>. Produce the instance score and classification score
+            if self.opt.use_additional_cap_layer:
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, indices)
+                if (stage > 0) and self.opt.use_neg_pseudo_box:
+                    _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head_refine[stage], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, neg_indices)
+            else:
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, indices)
+                if (stage > 0) and self.opt.use_neg_pseudo_box:
+                    _, _, _, neg_cap_prob = self.caption_prediction(self.caption_head[-1], dt, hs_lid, reference,
+                                                                    others, self.opt.caption_decoder_type, neg_indices) 
+            # breakpoint()   
+            # sentence_cap_prob: the caption probility for each matched query torch.Size([num_matched_query])
+            if self.opt.use_additional_score_layer:
+                query_ins_score = self.class_refine_head[stage](hs_lid)[:, query_indices, :]
+            else:
+                query_ins_score = outputs_classes[-1][:, query_indices, :] # [1, num_matched_query, 1]
+            query_pred_boxes = outputs_coord[-1][:, query_indices, :] # [1, num_matched_query, 2]
+            query_pred_boxes = query_pred_boxes[0,:,:][cap_sort].view(-1, 2) # [num_matched_query, 2]
+            # breakpoint()
+            try:
+                query_ins_score = query_ins_score[0,cap_sort,0].view(-1, aug_num) # [num_cap, num_aug]
+            except:
+                breakpoint()
+            if self.opt.norm_ins_score == 'softmax':
+                query_ins_score = torch.softmax(query_ins_score, dim=-1)
+            elif self.opt.norm_ins_score == 'sigmoid':
+                query_ins_score = query_ins_score.sigmoid()
+            else:
+                raise NotImplementedError
+
+            # breakpoint()
+            # sentence_cap_score = cap_probs['cap_prob_train']
+            temperature = 2
+            sentence_cap_prob = sentence_cap_prob[cap_sort].view(-1, aug_num) # [num_cap, num_aug]
+            cap_len = torch.tensor([len(cap.split()) for cap in dt['cap_raw'][0]], device=sentence_cap_prob.device).unsqueeze(1)
+            sentence_cap_score = (sentence_cap_prob / cap_len) ** temperature + 1e-5
+
+            sentence_cap_score[torch.isinf(sentence_cap_score)] = 1e8
+
+            sentence_cap_score = sentence_cap_score.detach()
+            query_ins_score = query_ins_score.detach()
+
+            # breakpoint()
+            query_score = sentence_cap_score + query_ins_score
+            # sentence_score = 
+            # if (stage == 0) or (self.opt.focal_mil == False):
+            #     sentence_cap_prob = torch.softmax(sentence_cap_prob, dim=-1) # Softmax over queries in the same bag
+            # else:
+            #     sentence_cap_prob = sentence_cap_prob.sigmoid()
+
+            # if self.opt.cap_prob_clip:
+            #     query_score = sentence_cap_prob.detach() * query_ins_score # [num_cap, num_aug]
+            # else:
+            #     query_score = sentence_cap_prob * query_ins_score # [num_cap, num_aug]
+
+            # # ++++++ <2>. Calculate the MIL loss and Neg loss
+            bag_score = query_score.sum(dim=-1) # [num_cap]
+            bag_score = bag_score.clamp(0,1)
+            bag_score_cache.append(bag_score)
+            mil_weight = bag_score_cache[stage-1] if self.opt.weighted_mil_loss else torch.ones_like(bag_score).to(bag_score.device)
+            if stage > 0:
+                if self.opt.focal_mil:
+                    focal_weight = (torch.ones_like(bag_score).to(bag_score.device) - bag_score).pow(2)
+                    mil_loss =  - focal_weight * (bag_score + 1e-6).log()
+                    mil_loss = (mil_weight * mil_loss).mean()
+                else:
+                    # breakpoint()
+                    mil_loss = - (mil_weight * bag_score.log()).mean()
+                if self.opt.use_neg_pseudo_box:
+                    neg_cap_prob = neg_cap_prob.sigmoid()
+                    neg_loss = - ((neg_cap_prob).pow(2) * (1- neg_cap_prob).log()).view(num_sentence,-1).mean(dim=-1)
+                    neg_loss = (mil_weight * neg_loss).mean()
+                    mil_loss += neg_loss
+            else:
+                mil_loss = F.binary_cross_entropy(bag_score, torch.ones_like(bag_score).to(bag_score.device))
+            if 'loss_mil' in mil_dict.keys():
+                mil_dict['loss_mil'] += mil_loss
+            else:
+                mil_dict['loss_mil'] =  mil_loss
+            # ++++++ <3>. Merge the pseudo box to generate new pseudo box
+            if self.opt.merge_criterion == 'cap_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(sentence_cap_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k]
+            elif self.opt.merge_criterion == 'ins_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_ins_score, k=self.opt.merge_k_boxes, dim=-1)
+            elif self.opt.merge_criterion == 'ins_cap_topk':
+                topk_pseudo_scores, topk_pseudo_indices = torch.topk(query_score, k=self.opt.merge_k_boxes, dim=-1) # [num_caption, k]
+            else:
+                raise NotImplementedError('merge_criterion {} is not implemented'.format(self.opt.merge_criterion))
+            # breakpoint()
+            topk_pseudo_scores = topk_pseudo_scores / (topk_pseudo_scores.sum(dim=-1, keepdim=True) + 1e-6) # [num_caption, k]
+            weight = topk_pseudo_scores.unsqueeze(-1).repeat(1,1,2) # [num_caption, k, 2]
+            for i in range(len(dt['video_target'])):
+                previous_pseudo_box = dt['video_target'][i]['box_pseudo_aug'] #[num_caption*num_aug, 2]
+                if self.opt.use_query_box_for_refine:
+                    # Use the coordinates of query as part of guidance for refinement
+                    previous_pseudo_box = (previous_pseudo_box + query_pred_boxes) / 2
+                if self.opt.merge_mode == 'weighted_sum':
+                    # Merge top-k boxes with weighted sum
+                    selected_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \
+                                                    topk_pseudo_indices.unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1))) # [num_caption, k, 2]
+                    refined_pseudo_box = (weight * selected_pseudo_box).sum(dim=1).clamp(0,1) # [num_caption, 2]
+                    dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone()
+                # I met the following problem with ''targets_cp = copy.deepcopy(targets)'' in criterion.py:
+                # RuntimeError: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment
+                # When I tried to conduct the deepcopy operation with the targets which have been updated with 'boxes_pseudo' keys
+                # So I detach the refined_pseudo_box here to avoid the deepcopy operation here
+                # Commented by Huabin, 2023/9/14
+                elif self.opt.merge_mode == 'interpolate':
+                    # Generate new box with linear interpolation between previous pbox and pbox with max score
+                    max_pseudo_scores = topk_pseudo_scores[:,:1]
+                    max_coef = 0.5 * torch.ones_like(max_pseudo_scores).to(max_pseudo_scores.device) # Set a max coef for box interpolatation
+                    max_pseudo_box = torch.gather(previous_pseudo_box.view(-1,aug_num,2), 1, \
+                                                    topk_pseudo_indices[:,:1].unsqueeze(-1).expand(-1,-1,previous_pseudo_box.size(-1)))
+                    interpolate_coef = torch.min(max_pseudo_scores, max_coef)
+                    refined_pseudo_box = (1-interpolate_coef) * previous_pseudo_box[(aug_num-1)::aug_num, :] \
+                                        + interpolate_coef * max_pseudo_box.squeeze(1)
+                    refined_pseudo_box = refined_pseudo_box.clamp(0,1)
+                    dt['video_target'][i]['boxes_pseudo'] = refined_pseudo_box.detach().clone()
+
+        # ++++++ <4>. End of the refinement, inverse-repeat the dt['cap_tensor'] and dt['cap_mask']
+        dt['cap_tensor'] = ori_dt_cap_tensor
+        dt['cap_mask'] = ori_dt_cap_mask
+        mil_dict['loss_mil'] = mil_dict['loss_mil'] / self.opt.refine_pseudo_stage_num
+        criterion.pseudo_box_aug = False
+        # ================== End of refinement ========================================
+        # breakpoint()
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+            loss, last_indices, aux_indices = criterion(out, dt['video_target'], others)
+            if self.opt.disable_rematch:
+                # Disable re-matching and directly use the indices with max score in the last stage of refinment
+                selected_indices = query_score.argmax(dim=-1).unsqueeze(-1)
+                query_indices_in_refine = reorder_query_indices.to(selected_indices.device).view(-1, aug_num)
+                query_indices_in_refine = query_indices_in_refine.gather(1, selected_indices)
+                query_indices_in_refine, index_sort = torch.sort(query_indices_in_refine, 0)
+                cap_indices_in_refine = last_indices[0][0][1].sort()[0]
+                last_indices = [[(query_indices_in_refine.view(-1), cap_indices_in_refine[index_sort.view(-1)])], last_indices[1]]
+            loss.update(mil_dict)
+            criterion.pseudo_box_aug = True 
+            for l_id in range(hs.shape[0]):
+                hs_lid = hs[l_id]
+                reference = init_reference if l_id == 0 else inter_references[l_id - 1]
+                indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0]
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                                   others, self.opt.caption_decoder_type, indices)
+                l_dict = {'loss_caption': cap_loss}
+                if l_id != hs.shape[0] - 1:
+                    l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()}
+                loss.update(l_dict)
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+        else:
+            loss, last_indices = criterion(out, dt['video_target'], others)
+            criterion.pseudo_box_aug = True
+            l_id = hs.shape[0] - 1
+            reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            indices = last_indices[0]
+            cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                               others, self.opt.caption_decoder_type, indices)
+            l_dict = {'loss_caption': cap_loss}
+            loss.update(l_dict)
+
+            out.pop('caption_losses')
+            out.pop('caption_costs')
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+
+
+        return out, loss
+
+    def parallel_prediction_matched(self, dt, criterion, contrastive_criterion, hs, init_reference, inter_references, others,
+                                    disable_iterative_refine, transformer_input_type='queries'):
+        
+        outputs_classes = []
+        outputs_counts = []
+        outputs_coords = []
+        outputs_cap_costs = []
+        outputs_cap_losses = []
+        outputs_cap_probs = []
+        outputs_cap_seqs = []
+        cl_match_mats = []
+
+        num_pred = hs.shape[0]
+
+        if self.opt.pseudo_box_aug:
+            assert self.opt.use_pseudo_box
+            cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word)
+            dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+            dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+
+        for l_id in range(num_pred):
+            hs_lid = hs[l_id]
+            reference = init_reference if l_id == 0 else inter_references[
+                l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            outputs_class = self.class_head[l_id](hs_lid)  # [bs, num_query, N_class]
+            outputs_count = self.predict_event_num(self.count_head[l_id], hs_lid)
+            tmp = self.bbox_head[l_id](hs_lid)  # [bs, num_query, 2]
+
+
+            cost_caption, loss_caption, cap_probs, seq = self.caption_prediction(self.caption_head[l_id], dt, hs_lid,
+                                                                                 reference, others, 'none')
+            # if self.opt.use_anchor:
+            #     outputs_coord = reference
+            # else:
+            if disable_iterative_refine:
+                outputs_coord = reference
+            else:
+                reference = inverse_sigmoid(reference)
+                if reference.shape[-1] == 2:
+                    tmp += reference
+                else:
+                    assert reference.shape[-1] == 1
+                    tmp[..., :1] += reference
+                outputs_coord = tmp.sigmoid()  # [bs, num_query, 4]
+
+            # Processing the text embed and event embed for alignment
+            if self.load_text_embed or not self.opt.disable_contrastive_projection:
+                assert others['text_embed'].shape[0] == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, others['text_embed'].shape[0])
+                text_embed = others['text_embed'][l_id]   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id] 
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                # event_embed = event_embed.reshape(-1, event_embed.shape[-1])
+                # TODO: complete the contrastive learning to return the similarity matrices as 'cl_match_mat'
+
+
+            if self.opt.enable_contrastive and self.opt.set_cost_cl > 0:
+                assert len(others['text_embed']) == num_pred, \
+                    'visual features have {} levels, but text have {}'.format(num_pred, len(others['text_embed']))
+                text_embed = torch.cat(others['text_embed'][l_id], dim=0)   # [num_sentence, contrastive_dim]
+                event_embed = others['event_embed'][l_id]
+                event_embed = event_embed.reshape(-1, event_embed.shape[-1]) # [num_query, contrastive_dim]
+                cl_match_mat = contrastive_criterion.forward_logits(text_embed, event_embed, self.background_embed).t()
+                # cl_match_mat: [num_query, num_sentence]
+                cl_match_mats.append(cl_match_mat)
+            else:
+                cl_match_mats.append(0)
+
+            outputs_classes.append(outputs_class)
+            outputs_counts.append(outputs_count)
+            outputs_coords.append(outputs_coord)
+            # outputs_cap_losses.append(cap_loss)
+            outputs_cap_probs.append(cap_probs)
+            outputs_cap_seqs.append(seq)
+
+        outputs_class = torch.stack(outputs_classes)  # [decoder_layer, bs, num_query, N_class]
+        outputs_count = torch.stack(outputs_counts)
+        outputs_coord = torch.stack(outputs_coords)  # [decoder_layer, bs, num_query, 4]
+        # outputs_cap_loss = torch.stack(outputs_cap_losses)
+
+        all_out = {
+            'pred_logits': outputs_class,
+            'pred_count': outputs_count,
+            'pred_boxes': outputs_coord,
+            'caption_probs': outputs_cap_probs,
+            'seq': outputs_cap_seqs,
+            'cl_match_mats': cl_match_mats}
+        out = {k: v[-1] for k, v in all_out.items()}
+
+        if self.aux_loss:
+            ks, vs = list(zip(*(all_out.items())))
+            out['aux_outputs'] = [{ks[i]: vs[i][j] for i in range(len(ks))} for j in range(num_pred - 1)]
+            if transformer_input_type == 'prior_proposals':
+                loss, _, _ = criterion(out, dt['video_target'])
+                # Random select an query from each segment
+                num_sentence = dt['cap_tensor'].shape[0]
+                num_query = hs.shape[-2]
+                num_query_interval = num_query // num_sentence
+                query_indices = []
+                for i in range(num_sentence):
+                    interval_min = i * num_query_interval
+                    interval_max = interval_min + num_query_interval
+                    sample = torch.randint(interval_min, interval_max, (hs.shape[0],))
+                    query_indices.append(sample)
+                query_indices = torch.cat(query_indices, dim=0)
+                gt_indices = torch.arange(num_sentence)
+
+                last_indices = ([(query_indices[::hs.shape[0]], gt_indices)], [None, None])
+                aux_indices = []
+                for l_id in range(hs.shape[0]-1):
+                    aux_indices.append(([(query_indices[(l_id+1)::hs.shape[0]], gt_indices)], [None, None]))
+            else:
+                loss, last_indices, aux_indices = criterion(out, dt['video_target'], others)
+            for l_id in range(hs.shape[0]):
+                hs_lid = hs[l_id]
+                reference = init_reference if l_id == 0 else inter_references[l_id - 1]
+                indices = last_indices[0] if l_id == hs.shape[0] - 1 else aux_indices[l_id][0]
+                cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                                   others, self.opt.caption_decoder_type, indices)
+
+                l_dict = {'loss_caption': cap_loss}
+                if (self.opt.matcher_type == 'DTW' or self.opt.matcher_type == 'Sim'):
+                    contrastive_loss = contrastive_criterion(
+                        text_embed = others['text_embed'][l_id],
+                        event_embed = others['event_embed'][l_id],
+                        matching_indices = indices,
+                        bg_embed = self.background_embed,
+                    )
+
+                    l_dict.update({'contrastive_loss': contrastive_loss})
+                if l_id != hs.shape[0] - 1:
+                    l_dict = {k + f'_{l_id}': v for k, v in l_dict.items()}
+                loss.update(l_dict)
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+        else:
+            loss, last_indices = criterion(out, dt['video_target'], others)
+
+            l_id = hs.shape[0] - 1
+            reference = inter_references[l_id - 1]  # [decoder_layer, batch, query_num, ...]
+            hs_lid = hs[l_id]
+            indices = last_indices[0]
+            cap_loss, cap_probs, seq, sentence_cap_prob = self.caption_prediction(self.caption_head[l_id], dt, hs_lid, reference,
+                                                               others, self.opt.caption_decoder_type, indices)
+            l_dict = {'loss_caption': cap_loss}
+            loss.update(l_dict)
+
+            out.pop('caption_losses')
+            out.pop('caption_costs')
+            out.update({'caption_probs': cap_probs, 'seq': seq})
+
+        return out, loss
+
+    def caption_prediction(self, cap_head, dt, hs, reference, others, captioner_type, indices=None):
+        N_, N_q, C = hs.shape
+        # all_cap_num = len(dt['cap_tensor'])
+        # if self.opt.pseudo_box_aug:
+        #     assert self.opt.use_pseudo_box
+        #     cap_dim = dt['cap_tensor'].shape[-1] # (num_sen, num_max_word)
+        #     # breakpoint()
+        #     if indices != None:
+        #         breakpoint()
+        #     dt['cap_tensor'] = dt['cap_tensor'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+        #     dt['cap_mask'] = dt['cap_mask'].repeat(1, self.opt.pseudo_box_aug_num).reshape(-1, cap_dim)
+        all_cap_num = len(dt['cap_tensor'])
+        query_mask = others['proposals_mask']
+        gt_mask = dt['gt_boxes_mask']
+        mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item())
+        query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu()
+        hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C)
+
+        if indices == None:
+            row_idx, col_idx = 0, 0
+            for i in range(N_):
+                mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1
+                row_idx=row_idx + query_nums[i]
+                col_idx= col_idx + gt_nums[i]
+
+            bigids = mix_mask.nonzero(as_tuple=False)
+            feat_bigids, cap_bigids = bigids[:, 0], bigids[:, 1]
+        else:
+            # breakpoint()
+            feat_bigids = torch.zeros(sum([len(_[0]) for _ in indices])).long()
+            cap_bigids = torch.zeros_like(feat_bigids)
+            total_query_ids = 0
+            total_cap_ids = 0
+            total_ids = 0
+            max_pair_num = max([len(_[0]) for _ in indices])
+            new_hr_for_dsa = torch.zeros(N_, max_pair_num, C)  # only for lstm-dsa
+            cap_seq = dt['cap_tensor']
+            new_seq_for_dsa = torch.zeros(N_, max_pair_num, cap_seq.shape[-1], dtype=cap_seq.dtype)  # only for lstm-dsa
+            for i, index in enumerate(indices):
+                feat_ids, cap_ids = index
+                feat_bigids[total_ids: total_ids + len(feat_ids)] = total_query_ids + feat_ids
+                cap_bigids[total_ids: total_ids + len(feat_ids)] = total_cap_ids + cap_ids
+                new_hr_for_dsa[i, :len(feat_ids)] = hs[i, feat_ids]
+                new_seq_for_dsa[i, :len(feat_ids)] = cap_seq[total_cap_ids + cap_ids]
+                total_query_ids += query_nums[i]
+                total_cap_ids += gt_nums[i]
+                total_ids += len(feat_ids)
+            # if self.opt.pseudo_box_aug:
+            #     # Revise the matched targer ids for pseudo box augmentation to caption id
+            #     cap_bigids = cap_bigids // self.opt.pseudo_box_aug_num
+        cap_probs = {}
+        flag = True
+
+        if captioner_type == 'none':
+            cost_caption = torch.zeros(N_, N_q, all_cap_num,
+                                       device=hs.device)  # batch_size * num_queries * all_caption_num
+            loss_caption = torch.zeros(N_, N_q, all_cap_num, device=hs.device)
+            cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device)
+            cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device)
+            seq = torch.zeros(N_, N_q, 3, device=hs.device)
+            return cost_caption, loss_caption, cap_probs, seq
+
+        elif captioner_type in ['light']:
+            clip = hs_r.unsqueeze(1)
+            clip_mask = clip.new_ones(clip.shape[:2])
+            event = None
+        elif self.opt.caption_decoder_type == 'standard':
+            # breakpoint()
+            # assert N_ == 1, 'only support batchsize = 1'
+            if self.training:
+                # breakpoint()
+                seq = dt['cap_tensor'][cap_bigids]
+                if self.opt.caption_cost_type != 'rl':
+                    if self.opt.refine_pseudo_box: # Only training and refine_pseudo_box = True returns the raw_cap_prob
+                        cap_prob, raw_cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq)
+                        # shape: [num_sentence, max_num_word, num_vocab]
+                        # cap_prob is log_softmax(prob), raw_cap_prob is (prob)
+                        cap_probs['cap_prob_train'] = cap_prob
+                        cap_probs['raw_cap_prob'] = raw_cap_prob
+                    else:
+                        cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others, seq) 
+                        # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608], here 13 is the max length among 5 sentences
+                        cap_probs['cap_prob_train'] = cap_prob
+            else:
+                with torch.no_grad():
+                    cap_prob = cap_head(hs[:, feat_bigids], reference[:, feat_bigids], others,
+                                        dt['cap_tensor'][cap_bigids])
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, N_q, seq.shape[-1])
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+
+            flag = False
+            pass
+
+        if flag:
+            clip_ext = clip[feat_bigids]
+            clip_mask_ext = clip_mask[feat_bigids]
+
+            if self.training:
+                seq = dt['cap_tensor'][cap_bigids]
+                if self.opt.caption_cost_type != 'rl':
+                    cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq)
+                    cap_probs['cap_prob_train'] = cap_prob
+            else:
+                with torch.no_grad():
+                    seq_gt = dt['cap_tensor'][cap_bigids]
+                    cap_prob = cap_head(event, clip_ext, clip_mask_ext, seq_gt)
+                    seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask)
+
+                    if len(seq):
+                        # re_seq = torch.zeros(N_, N_q, seq.shape[-1])
+                        # re_cap_prob_eval = torch.zeros(N_, N_q, cap_prob_eval.shape[-1])
+                        seq = seq.reshape(-1, N_q, seq.shape[-1])
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+
+        if self.opt.caption_cost_type == 'loss':
+            cap_prob = cap_prob.reshape(-1, cap_prob.shape[-2], cap_prob.shape[-1]) # [num_matched_query, max_length_sentence, num_word_in_vocab], e.g., [5, 13, 1608]
+            caption_tensor = dt['cap_tensor'][:, 1:][cap_bigids] # [num_sentence, max_num_sentence], e.g, [5, 13]
+            caption_mask = dt['cap_mask'][:, 1:][cap_bigids]  # [num_sentence, max_num_sentence], e.g, [5, 13]
+            cap_loss = cap_head.build_loss(cap_prob, caption_tensor, caption_mask) # [num_query]
+            cap_cost = cap_loss
+        else:
+            raise AssertionError('caption cost type error')
+
+        # Calculate caption probs for each query
+        # breakpoint()
+        # if self.opt.refine_pseudo_box:
+        #     sentence_cap_prob = cap_head.build_prob(raw_cap_prob, caption_tensor, caption_mask)
+        # else:
+        sentence_cap_prob = - cap_loss
+
+        if indices:
+            return cap_loss.mean(), cap_probs, seq, sentence_cap_prob
+        # cap_loss.mean(): [num_matched_query] --> [1], 
+        # cap_probs: dict, contains 'cap_prob_train' or 'cap_prob_eval' [num_matched_query, max_length_sentence, num_word_in_vocab]
+        # seq： [num_sentence, max_length_sentence+1], here the '+1' means the 1st col is all '0'
+ 
+        cap_id, query_id = cap_bigids, feat_bigids
+        cost_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1))
+        cost_caption[query_id, cap_id] = cap_cost
+        loss_caption = hs_r.new_zeros((max(query_id) + 1, max(cap_id) + 1))
+        loss_caption[query_id, cap_id] = cap_loss
+        cost_caption = cost_caption.reshape(-1, N_q,
+                                            max(cap_id) + 1)  # batch_size * num_queries * all_caption_num
+        loss_caption = loss_caption.reshape(-1, N_q, max(cap_id) + 1)
+        return cost_caption, loss_caption, cap_probs, seq
+
+    def caption_prediction_eval(self, cap_head, dt, hs, reference, others, decoder_type, pred_num=None, indices=None):
+        assert indices == None
+        N_, N_q, C = hs.shape
+        query_mask = others['proposals_mask']
+        gt_mask = dt['gt_boxes_mask']
+        mix_mask = torch.zeros(query_mask.sum().item(), gt_mask.sum().item())
+        query_nums, gt_nums = query_mask.sum(1).cpu(), gt_mask.sum(1).cpu()
+        hs_r = torch.masked_select(hs, query_mask.unsqueeze(-1)).reshape(-1, C)
+
+        row_idx, col_idx = 0, 0
+        for i in range(N_):
+            mix_mask[row_idx: (row_idx + query_nums[i]), col_idx: (col_idx + gt_nums[i])] = 1
+            row_idx = row_idx + query_nums[i]
+            col_idx = col_idx + gt_nums[i]
+
+        cap_probs = {}
+
+        if decoder_type in ['none']:
+            cap_probs['cap_prob_train'] = torch.zeros(1, device=hs.device)
+            cap_probs['cap_prob_eval'] = torch.zeros(N_, N_q, 3, device=hs.device)
+            seq = torch.zeros(N_, N_q, 3, device=hs.device)
+            return cap_probs, seq
+
+        elif decoder_type in ['light']:
+            clip = hs_r.unsqueeze(1)
+            clip_mask = clip.new_ones(clip.shape[:2])
+            event = None
+            seq, cap_prob_eval = cap_head.sample(event, clip, clip_mask)
+            if len(seq):
+                seq = seq.reshape(-1, N_q, seq.shape[-1])
+                cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+            cap_probs['cap_prob_eval'] = cap_prob_eval
+
+        elif decoder_type in ['standard']:
+            assert N_ == 1, 'only support batchsize = 1'
+            with torch.no_grad():
+                if self.opt.transformer_input_type == 'prior_proposals':
+                    # hs: [bs, num_query, feat_dim]  
+                    # reference: [bs, num_query, 2]
+                    if pred_num:
+                        num_cap =  pred_num
+                    else:
+                        num_cap =  dt['cap_tensor'].shape[0]
+                    interval = N_q // num_cap
+                    pool_layer = torch.nn.AvgPool1d(interval,stride=interval)
+                    hs = pool_layer(hs.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # [batch, num_sentence, dim]
+                    reference = pool_layer(reference.permute(0,2,1)).permute(0,2,1)[:,:num_cap,:] # # [batch, num_sentence, 2]
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, num_cap, seq.shape[-1]) #
+                        cap_prob_eval = cap_prob_eval.reshape(-1, num_cap, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+                else:
+                    seq, cap_prob_eval = cap_head.sample(hs, reference, others)
+                    if len(seq):
+                        seq = seq.reshape(-1, N_q, seq.shape[-1]) #
+                        cap_prob_eval = cap_prob_eval.reshape(-1, N_q, cap_prob_eval.shape[-1])
+                    cap_probs['cap_prob_eval'] = cap_prob_eval
+        return cap_probs, seq
+
+
+class PostProcess(nn.Module):
+    """ This module converts the model's output into the format expected by the coco api"""
+
+    def __init__(self, opt):
+        super().__init__()
+        self.opt = opt
+
+    @torch.no_grad()
+    def forward(self, outputs, target_sizes, loader):
+        """ Perform the computation
+        Parameters:
+            outputs: raw outputs of the model
+            target_sizes: tensor of dimension [batch_size] containing the size of each video of the batch
+        """
+        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']
+        N, N_q, N_class = out_logits.shape
+        assert len(out_logits) == len(target_sizes)
+        prob = out_logits.sigmoid() # batch, num_queries, 1
+
+        if self.opt.transformer_input_type == 'prior_proposals':
+            #topk_values = prob.view(N, N_q)
+            #topk_indexes = torch.arange(N_q, device=prob.device).unsqueeze(0).repeat(N, 1)
+            topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), outputs['seq'].shape[1], dim=1)
+        else:
+            topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), N_q, dim=1)
+        scores = topk_values
+        # topk_boxes = topk_indexes // out_logits.shape[2]
+        topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode='floor')
+        labels = topk_indexes % out_logits.shape[2]
+        boxes = box_ops.box_cl_to_xy(out_bbox)
+        raw_boxes = copy.deepcopy(boxes)
+        boxes[boxes < 0] = 0
+        boxes[boxes > 1] = 1
+        boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 2))
+
+        scale_fct = torch.stack([target_sizes, target_sizes], dim=1)
+        boxes = boxes * scale_fct[:, None, :]
+        seq = outputs['seq']  # [batch_size, num_queries, max_Cap_len=30]
+        cap_prob = outputs['caption_probs']['cap_prob_eval']  # [batch_size, num_queries]
+        eseq_lens = outputs['pred_count'].argmax(dim=-1).clamp(min=1)
+
+        if len(seq):
+            mask = (seq > 0).float()
+            # cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float') / (
+            #         1e-5 + mask.sum(2).cpu().numpy().astype('float'))
+            cap_scores = (mask * cap_prob).sum(2).cpu().numpy().astype('float')
+            seq = seq.detach().cpu().numpy().astype('int')  # (eseq_batch_size, eseq_len, cap_len)
+            caps = [[loader.dataset.translator.rtranslate(s) for s in s_vid] for s_vid in seq]
+            if self.opt.transformer_input_type != 'prior_proposals':
+                caps = [[caps[batch][idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)]  # Re-arrange the caption order accroding to the logits
+                cap_scores = [[cap_scores[batch, idx] for q_id, idx in enumerate(b)] for batch, b in enumerate(topk_boxes)]
+        else:
+            bs, num_queries = boxes.shape[:2]
+            cap_scores = [[-1e5] * num_queries] * bs
+            caps = [[''] * num_queries] * bs
+
+        results = [
+            {'scores': s, 'labels': l, 'boxes': b, 'raw_boxes': b, 'captions': c, 'caption_scores': cs, 'query_id': qid,
+             'vid_duration': ts, 'pred_seq_len': sl} for s, l, b, rb, c, cs, qid, ts, sl in
+            zip(scores, labels, boxes, raw_boxes, caps, cap_scores, topk_boxes, target_sizes, eseq_lens)]
+        return results
+
+
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+
+
+def build(args):
+    device = torch.device(args.device)
+    base_encoder = build_base_encoder(args)
+    # For text encoder when using DTW matcher
+    # if args.matcher_type == 'DTW' or args.use_pseudo_box:
+    #     if args.pretrained_language_model == 'UniVL':
+    #         print('Load pretrained UniVL model weights')
+    #         text_encoder = load_pretrained_UniVL()
+    #     else:
+    #         for i in range(10):
+    #             try:
+    #                 text_encoder = AutoModel.from_pretrained(args.pretrained_language_model, cache_dir=args.huggingface_cache_dir)
+    #                 break
+    #             except:
+    #                 print('download error in AutoModel, retry...')
+    #                 time.sleep(1)
+    # else:
+    #     text_encoder = None
+
+    transformer = build_deforamble_transformer(args)
+    captioner = build_captioner(args)
+
+    model = PDVC(
+        base_encoder,
+        transformer,
+        captioner,
+        num_classes=args.num_classes,
+        num_queries=args.num_queries,
+        num_feature_levels=args.num_feature_levels,
+        aux_loss=args.aux_loss,
+        with_box_refine=args.with_box_refine,
+        opt=args
+    )
+
+    matcher = build_matcher(args)
+    if args.matcher_type == 'DTW' and args.use_anchor:
+        weight_dict = {'loss_ce': args.cls_loss_coef,
+                    'loss_bbox': args.bbox_loss_coef,
+                    'loss_giou': args.giou_loss_coef,
+                    'loss_self_iou': args.self_iou_loss_coef,
+                    'loss_ref_rank': args.ref_rank_loss_coef,
+                    'loss_counter': args.count_loss_coef,
+                    'loss_caption': args.caption_loss_coef,
+                    'contrastive_loss': args.contrastive_loss_start_coef,
+                    }
+    else:
+        weight_dict = {'loss_ce': args.cls_loss_coef,
+                    'loss_bbox': args.bbox_loss_coef,
+                    'loss_giou': args.giou_loss_coef,
+                    'loss_counter': args.count_loss_coef,
+                    'loss_caption': args.caption_loss_coef,
+                    'contrastive_loss': args.contrastive_loss_start_coef,
+                    }
+    if args.refine_pseudo_box:
+        weight_dict.update({'loss_mil': args.mil_loss_coef})
+    # TODO this is a hack
+    if args.aux_loss:
+        aux_weight_dict = {}
+        for i in range(args.dec_layers - 1):
+            aux_weight_dict.update({k + f'_{i}': v for k, v in weight_dict.items()})
+        weight_dict.update(aux_weight_dict)
+
+    losses = ['labels', 'boxes', 'cardinality']
+
+    if args.matcher_type == 'DTW' or args.matcher_type == 'Sim':
+        criterion = AlignCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha,
+                                focal_gamma=args.focal_gamma, opt=args)
+        contrastive_criterion = ContrastiveCriterion(temperature=args.contrastive_loss_temperature,
+                                                 enable_cross_video_cl=args.enable_cross_video_cl,
+                                                 enable_e2t_cl = args.enable_e2t_cl,
+                                                 enable_bg_for_cl = args.enable_bg_for_cl)
+        contrastive_criterion.to(device)
+    else:
+        criterion = SetCriterion(args.num_classes, matcher, weight_dict, losses, focal_alpha=args.focal_alpha,
+                                focal_gamma=args.focal_gamma, opt=args)
+        contrastive_criterion = None
+    
+    criterion.to(device)
+    postprocessors = {'bbox': PostProcess(args)}
+
+    return model, criterion, contrastive_criterion, postprocessors
+
+
diff --git a/yc2_univl/backup/pdvc/position_encoding.py b/yc2_univl/backup/pdvc/position_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb71befd6e4397bd4d5a30c7a43861cea158cc7
--- /dev/null
+++ b/yc2_univl/backup/pdvc/position_encoding.py
@@ -0,0 +1,76 @@
+# ------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+
+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+
+from misc.detr_utils.misc import NestedTensor
+
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+        self.max_duration = 256
+        self.duration_embed_layer = nn.Linear(self.max_duration, self.max_duration)
+
+    def forward(self, tensor_list: NestedTensor):
+        x = tensor_list.tensors
+        mask = tensor_list.mask
+        duration = tensor_list.duration
+        assert mask is not None
+        not_mask = ~mask
+        x_embed = not_mask.cumsum(1, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            x_embed = (x_embed - 0.5) / (x_embed[:, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        # dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        dim_t = self.temperature ** (2 * (torch.div(dim_t, 2, rounding_mode='floor')) / self.num_pos_feats)
+        pos_x = x_embed[:, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+
+        dur_embed = self.duration_embedding(duration).reshape(-1,1,self.max_duration).expand_as(pos_x)
+        pos = torch.cat((pos_x, dur_embed), dim=2).permute(0, 2, 1)
+        return pos
+
+    def duration_embedding(self, durations):
+        out = torch.zeros(len(durations), self.max_duration, device=durations.device)
+        durations = durations.int()
+        for ii in range(len(durations)):
+            out[ii, :durations[ii]] = 1
+        out = self.duration_embed_layer(out)
+        return out
+
+
+
+def build_position_encoding(position_embedding, N_steps):
+    if position_embedding in ('v2', 'sine'):
+        # TODO find a better way of exposing other arguments
+        position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
+    else:
+        raise ValueError(f"not supported {position_embedding}")
+
+    return position_embedding
diff --git a/yc2_univl/backup/pdvc/util.py b/yc2_univl/backup/pdvc/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e489c1bce356a96116e2c13fcabc1c84d132711
--- /dev/null
+++ b/yc2_univl/backup/pdvc/util.py
@@ -0,0 +1,72 @@
+import torch
+import numpy as np
+
+# def find_center_index(array: np.ndarray) -> np.ndarray:
+#     """
+#     Given a array with shape [steps, topk], find the center index between topk indexes
+#     which has the minimal average distance with other indexes.
+
+#     Args:
+#     - array: numpy array representing the input array with shape [steps, topk]
+
+#     Returns:
+#     - center_indexes: numpy array of center indexes for each step
+#     """
+
+#     distances = np.sum(np.abs(array[:, np.newaxis, :] - array[:, :, np.newaxis]), axis=2)
+#     center_indexes = np.argmin(distances, axis=1)
+
+#     return center_indexes
+
+def find_center_value(arr):
+    # Compute pairwise distances between all values
+    distances = np.abs(arr[:, np.newaxis] - arr[np.newaxis, :])
+    
+    # Sum distances for each value
+    sum_distances = np.sum(distances, axis=1)
+    
+    # Find the index of the value with the smallest sum distance
+    center_index = np.argmin(sum_distances)
+    
+    # Get the center value
+    center_value = arr[center_index]
+    
+    return center_value
+
+
+def compute_overlap(center_t, boundary_t, center_t_minus_1, boundary_t_minus_1):
+    """
+    Compute the overlap of boundaries between time t and t-1 for each element in the arrays.
+
+    Args:
+    - center_t: numpy array representing the center at time t with shape [N,]
+    - boundary_t: numpy array representing the boundary at time t with shape [N,1, candidates]
+    - center_t_minus_1: numpy array representing the center at time t-1 with shape [N,]
+    - boundary_t_minus_1: numpy array representing the boundary at time t-1 with shape [N,]
+
+    Returns:
+    - overlap: numpy array representing the overlap of boundaries with shape [N,]
+    """
+
+    boundary_t = boundary_t.squeeze(1)
+    boundary_t_minus_1 = boundary_t_minus_1.squeeze(1)
+    center_t = center_t[:, np.newaxis]
+    # breakpoint()
+    center_t_minus_1 = center_t_minus_1[:, np.newaxis]
+    # boundary_t_minus_1 = boundary_t_minus_1[:, np.newaxis]
+
+
+    # Calculate the start and end positions of the boundaries at time t and t-1
+    start_t = center_t - 0.5 * boundary_t
+    end_t = center_t + 0.5 * boundary_t
+    start_t_minus_1 = center_t_minus_1 - 0.5 * boundary_t_minus_1
+    end_t_minus_1 = center_t_minus_1 + 0.5 * boundary_t_minus_1
+
+    # Calculate the intersection and union of the boundaries
+    intersection = np.maximum(0, np.minimum(end_t, end_t_minus_1) - np.maximum(start_t, start_t_minus_1))
+    union = boundary_t + boundary_t_minus_1 - intersection
+
+    # Compute the overlap using the Intersection over Union (IoU) formula
+    overlap = intersection / union
+
+    return overlap
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/video_segmentation.py b/yc2_univl/backup/pdvc/video_segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfa7f74cf0a858fc7dc8a929638fc294fc9bfc13
--- /dev/null
+++ b/yc2_univl/backup/pdvc/video_segmentation.py
@@ -0,0 +1,976 @@
+import torch
+import numpy as np
+
+
+from pdvc.dp.exact_dp import drop_dtw, double_drop_dtw
+from pdvc.dp.dp_utils import compute_sim
+import statistics
+from sklearn.cluster import KMeans
+from pdvc.util import find_center_value, compute_overlap
+# from config import CONFIG
+
+''' configs of original file '''
+config_eval_l2norm = True 
+config_eval_keep_percentile = 0.48
+config_eval_fixed_drop_sim = -1
+
+
+''' 
+return value:
+frame features: [num_frames, feature_dim] -> optimal_assignment: [num_steps], -1 means no match, otherwise means the index of the matched step/caption/query
+
+'''
+# filter_threshold = 0.5
+
+def clip_array(arr, threshold):
+    clipped_arr = np.where(arr > threshold, arr, threshold)
+    return clipped_arr
+
+
+# def compute_filtered_indices(topk_indices_list, topk_values_list, scale=0.5):
+#     # center_indices = []
+#     # boundary_widths = []
+#     filtered_indices_list = []
+#     for topk_indices, topk_values in zip(topk_indices_list, topk_values_list):
+#         center_index = find_center_value(topk_indices)
+#         std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / sum(topk_values)) ** 0.5
+#         boundary_width = std_index * scale
+#         filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width]
+#         filtered_indices_list.append(filtered_indices)
+#         # center_indices.append(center_index)
+#         # boundary_widths.append(boundary_width)
+
+#     return filtered_indices_list
+
+def compute_filtered_indices(topk_indices, topk_values, threshold=0.5):
+    center_index = find_center_value(np.array(topk_indices))
+    std_index = (sum((topk_indices - center_index) ** 2 * topk_values) / (sum(topk_values) + 1e-5)) ** 0.5
+    boundary_width = std_index * threshold
+    filtered_indices = [i for i in topk_indices if abs(i - center_index) <= boundary_width]
+    return filtered_indices 
+
+def compute_bbox_loss(index_list, box, similarity_values):
+    left, right = box
+    distances = []
+
+    for i, index in enumerate(index_list):
+        if left <= index <= right:
+            distance = -min(index - left, right - index)
+        else:
+            distance = max(left - index, index - right)
+        
+        weighted_distance = similarity_values[i] * distance
+        distances.append(weighted_distance)
+
+    return sum(distances)
+
+
+
+
+
+def remove_outliers(indices, threshold, mode, w):
+    # Calculate the mean and standard deviation of the indices
+    if mode == 'median':
+        median = statistics.median(indices)
+    elif mode == 'mean':
+        mean = sum(indices) / len(indices)
+    elif mode == 'mode':
+        count_dict = {}
+        for p in range(min(indices), max(indices) + 1):
+            # print(p)
+            count = sum(1 for c in indices if p - w <= c <= p + w)
+            count_dict[p] = count
+
+        max_count = max(count_dict.values())
+        best_p_values = [p for p, count in count_dict.items() if count == max_count]
+        if len(best_p_values) % 2 == 0:
+            best_p_values.pop()
+        
+        mode_value = statistics.median(best_p_values)
+    std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5
+
+    # if mode == 'mode':
+    #     '''get mode-similar statistics'''
+    #     count_dict = {}
+    #     for p in range(min(indices), max(indices) + 1):
+    #         # print(p)
+    #         count = sum(1 for c in indices if p - w <= c <= p + w)
+    #         count_dict[p] = count
+
+    #     max_count = max(count_dict.values())
+    #     best_p_values = [p for p, count in count_dict.items() if count == max_count]
+    #     if len(best_p_values) % 2 == 0:
+    #         best_p_values.pop()
+        
+    #     mode_value = statistics.median(best_p_values)
+
+    # Calculate the threshold for identifying outliers
+    threshold_value = threshold * std_dev
+
+    # Filter out indices that are far from the mean
+    # breakpoint()
+
+    if mode == 'median':
+        filtered_indices = [i for i in indices if abs(i - median) <= threshold_value]
+    elif mode == 'mode':
+        filtered_indices = [i for i in indices if abs(i - mode_value) <= threshold_value]
+    return filtered_indices
+
+
+def remove_outliers_v1(indices, threshold):
+    pass 
+
+def get_mode(indices, w):
+    count_dict = {}
+    for p in range(min(indices), max(indices) + 1):
+        # print(p)
+        count = sum(1 for c in indices if p - w <= c <= p + w)
+        count_dict[p] = count
+
+    max_count = max(count_dict.values())
+    best_p_values = [p for p, count in count_dict.items() if count == max_count]
+    if len(best_p_values) % 2 == 0:
+        best_p_values.pop()
+    
+    mode_value = statistics.median(best_p_values)
+    return mode_value
+
+def get_mode_box(sim, topk, w, ratio): # topk选择20 ratio 1 
+    ''' 注意这里算中心的时候使用前topk是因为更相信前topk的准确率 但是确定中心以后需要找边界 就需要使用全部的'''
+    avg_caption_length = sim.shape[1] // sim.shape[0]
+    sorted_idx = torch.argsort(-sim, dim=1)
+    top_indices = sorted_idx[:, :topk]
+    # top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True)
+    # top_indices_half = top_indices[:, :topk//2]
+    top_cap_indices = sorted_idx[:, :avg_caption_length]
+    # sorted_idx = torch.argsort(-sim, dim=1)
+    width = int(ratio * avg_caption_length / 2) # ratio选择1
+    
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        # index_list = top_indices[i].tolist()
+        mode_value = get_mode(top_indices[i].tolist(), w)
+        filtered_indices = [i for i in top_cap_indices[i].tolist() if abs(i - mode_value) <= width]
+
+        # if len(filtered_indices) == 0:
+        #     filtered_indices = remove_outliers(sim[i].tolist(), top_indices[i].tolist(), 0.5, mode='median', w=w)
+        #     if len(filtered_indices) == 0:
+        #         bbox.append([0, sim.shape[1] - 1])
+        #         continue
+        if len(filtered_indices) == 0:
+            bbox.append([mode_value-width, mode_value+width])
+        else:
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+def compute_threshold(data, threshold):
+    mean = sum(data) / len(data)
+    std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
+    threshold_value = threshold * std_dev
+    return threshold_value
+
+
+# using similarity as weight to find center
+''' find center globally, then find the boundary locally. 
+    1. find center: use the similarity as weight to find the center
+    2. find boundary: use the center to find the boundary. steps are '''
+def step_retrieval_weight_sim(frame_features, step_features, topk=15, threshold=0.5, w=2):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # sim sum along a window
+    window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+
+    if len(window_sums.shape) == 1:
+        window_sums = window_sums.unsqueeze(0)
+        flag = 1
+    else:
+        flag = 0 
+
+    top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True)
+    # breakpoint()
+
+    # Find the frame with the maximum sum in each step
+    _, step_center_frames = window_sums.max(dim=1)
+    step_center_frames = step_center_frames.squeeze()
+
+    if flag == 1:
+        step_center_frames = step_center_frames.unsqueeze(0).tolist()
+    else:
+        step_center_frames = step_center_frames.tolist()
+
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        threshold_value = compute_threshold(top_indices[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value]
+        if len(filtered_indices) == 0:
+            bbox.append([step_center_frames[i] - w, step_center_frames[i] + w])
+        else:
+            bbox.append([w + min(filtered_indices), w + max(filtered_indices)])
+    
+    return bbox
+
+''' TODO: get the right weight using index'''
+def step_retrieval_weight_index(frame_features, step_features, topk=15, threshold=0.5, w=2):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    sorted_idx = torch.argsort(-similarity_matrix, dim=1)
+    # sim sum along a window
+    window_sums = torch.nn.functional.conv1d(similarity_matrix.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+
+    top_values, top_indices = torch.topk(window_sums, topk, dim=1, largest=True, sorted=True)
+    # breakpoint()
+
+    # Find the frame with the maximum sum in each step
+    _, step_center_frames = window_sums.max(dim=1)
+    step_center_frames = step_center_frames.squeeze().tolist()
+
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        threshold_value = compute_threshold(top_indices[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices[i].tolist() if abs(frame - step_center_frames[i]) <= threshold_value]
+        bbox.append([w + min(filtered_indices), w + max(filtered_indices)])
+    
+    return bbox
+
+def uniform_box(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    return uniform_boxes
+    
+
+def align_frame_into_steps(frame_features, step_features, topk=15, threshold=0.5, w=2, mode='median'):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    average_width = int(sim.shape[1] // sim.shape[0] / 2)
+    # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # bbox = get_mode_box(sim, topk, w, ratio)
+
+    top_values, top_indices = torch.topk(sim, topk, dim=1, largest=True, sorted=True)
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        filtered_indices = remove_outliers(top_indices[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) < 2:
+            filtered_indices = remove_outliers(top_indices[i].tolist(), 2*threshold, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append([top_indices[0] - average_width, top_indices[0] + average_width])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+# use optimization to compute pseudo boundary
+def align_frame_into_steps_op(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1):
+    # frame_features:  torch.Size([200, 768])
+    augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    # breakpoint()
+    # [#step, #frame]
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+    num_steps, num_frames = similarity_matrix.shape
+
+    # Select top-k frames for each caption [#step, #topk]
+    sorted_indices = np.argsort(similarity_matrix, axis=1)
+    # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+    # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+    # Compute center indexes [#step, 1]
+    
+
+    # Update boundary width 
+    initial_boundary_width = num_frames / num_steps # 1
+    # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+    # overlap = np.zeros(num_steps)
+
+    for i in range(num_iterations):
+        if i == 0 and not order:
+            boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+            topk_indices = [index[-topk:] for index in sorted_indices]
+            topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+            previous_index_center = None
+        #     # overlap_weight = 0
+        else:
+            if i == 0:
+                segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int)
+                start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:]
+                start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames)
+                boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1)
+
+                filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)]
+                if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4:
+                    boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+                    topk_indices = [index[-topk:] for index in sorted_indices]
+                    topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+                    center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+                    previous_index_center = None
+            else:
+                boundary_width_last = boundary_width.reshape(-1, 1, 1)
+                start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames)
+
+            topk_indices = []
+            topk_values = []
+            for j, (start, end) in enumerate(zip(start_indices, end_indices)):
+                # breakpoint()
+                filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)]
+                topk_index = filtered_indices[-topk:]
+                topk_indices.append(topk_index)
+                topk_values.append(similarity_matrix[j][topk_index])
+            previous_index_center = center_indexes.copy() if i > 0 else None
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+
+            # top_indices = sorted_indices[:, ]
+            # previous_index_center = center_indexes
+            # # overlap_weight = 0.5 * np.sum(overlap)
+        
+        boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+        # breakpoint()
+
+        index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]]
+
+        loss_candidates_list = [value[:, np.newaxis] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+        # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+
+
+        # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+        # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+        loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates]
+
+        if i == 0:            
+            loss = loss_sim
+            # print('loss shape:', loss_sim.shape, loss.shape)
+        else:
+            # measure the overlap between boundaries given center and boundary width
+            overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+            # breakpoint()
+            # print(loss_sim.shape, overlap.shape)
+            loss = loss_sim + beta * overlap 
+            # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim))
+            # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+        # find the best boundary width
+        # breakpoint()
+        best_boundary_width_index = np.argmin(loss, axis=1) # [#step]
+
+        # Use broadcasting to create row indices corresponding to each row
+        # row_indices = np.arange(num_steps)[:, np.newaxis]
+        # breakpoint()
+        # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+        boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step]
+        # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step]
+        boundary_width = np.array(boundary_width)
+        # print(boundary_width.shape)
+
+    bbox = []
+    left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames)
+    right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames)
+    # breakpoint()
+    bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int)
+
+    return bbox.tolist()
+
+# use optimization to compute pseudo boundary
+def align_frame_into_steps_op_v1(frame_features, step_features, topk=15, num_iterations=4, beta=1, order=False, scale=1):
+    # frame_features:  torch.Size([200, 768])
+    augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    # breakpoint()
+    # [#step, #frame]
+    similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+    num_steps, num_frames = similarity_matrix.shape
+
+    # Select top-k frames for each caption [#step, #topk]
+    sorted_indices = np.argsort(similarity_matrix, axis=1)
+    # top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+    # top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+    # Compute center indexes [#step, 1]
+    
+
+    # Update boundary width 
+    initial_boundary_width = num_frames / num_steps # 1
+    # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+    # overlap = np.zeros(num_steps)
+
+    for i in range(num_iterations):
+        if i == 0 and not order:
+            boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+            topk_indices = [index[-topk:] for index in sorted_indices]
+            topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+            previous_index_center = None
+        #     # overlap_weight = 0
+        else:
+            if i == 0:
+                segment_boundary = np.linspace(0, num_frames, num_steps + 1).round().astype(int)
+                start_indices, end_indices = segment_boundary[:-1], segment_boundary[1:]
+                start_indices = np.clip(start_indices - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(end_indices + initial_boundary_width * scale, 0, num_frames)
+                boundary_width_last = (end_indices - start_indices).reshape(-1, 1, 1)
+
+                filtered_indices = [sorted_indices[i][(sorted_indices[i] >= start_indices[i]) & (sorted_indices[i] <= end_indices[i])] for i in range(num_steps)]
+                if sum(len(index) for index in filtered_indices) < topk * num_steps * 0.4:
+                    boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+                    topk_indices = [index[-topk:] for index in sorted_indices]
+                    topk_values = [similarity_matrix[i][index] for i, index in enumerate(topk_indices)]
+
+
+                    center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+                    previous_index_center = None
+            else:
+                boundary_width_last = boundary_width.reshape(-1, 1, 1)
+                start_indices = np.clip(center_indexes - boundary_width // 2 - initial_boundary_width * scale, 0, num_frames)
+                end_indices = np.clip(center_indexes + boundary_width // 2 + initial_boundary_width * scale, 0, num_frames)
+
+            topk_indices = []
+            topk_values = []
+            for j, (start, end) in enumerate(zip(start_indices, end_indices)):
+                # breakpoint()
+                filtered_indices = sorted_indices[j][(sorted_indices[j] >= start) & (sorted_indices[j] <= end)]
+                topk_index = filtered_indices[-topk:]
+                topk_indices.append(topk_index)
+                topk_values.append(similarity_matrix[j][topk_index])
+            previous_index_center = center_indexes.copy() if i > 0 else None
+            center_indexes = np.array([find_center_value(index) for index in topk_indices]) 
+
+            # top_indices = sorted_indices[:, ]
+            # previous_index_center = center_indexes
+            # # overlap_weight = 0.5 * np.sum(overlap)
+        
+        boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+        # breakpoint()
+
+        index_distance = [np.abs(index - center_indexes[i] + 1e-3)[:, np.newaxis] for i, index in enumerate(topk_indices)] # [[topk, 1]]
+        
+        weight_distance = [clip_array(index_distance[i], 0.5 * boundary_width_candidates[i]) for i in range(len(topk_indices))] # [[topk, 1]]
+
+        loss_candidates_list = [value[:, np.newaxis] / weight_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+        # loss_candidates_list = [value[:, np.newaxis] / index_distance[i] * (np.abs(index_distance[i] - 0.5 * boundary_width_candidates[i])) for i, value in enumerate(topk_values)] # [[topk, candidates]]
+
+
+        # index_distance = np.abs(topk_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+        # loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+        loss_sim = np.array([np.mean(loss, axis=0) for loss in loss_candidates_list]) # [#step, #candidates]
+
+        if i == 0:            
+            loss = loss_sim
+            # print('loss shape:', loss_sim.shape, loss.shape)
+        else:
+            # measure the overlap between boundaries given center and boundary width
+            overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+            # breakpoint()
+            # print(loss_sim.shape, overlap.shape)
+            loss = loss_sim + beta * overlap 
+            # print("ratio of overlap:", np.sum(overlap) / np.sum(loss_sim))
+            # print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+        # find the best boundary width
+        # breakpoint()
+        best_boundary_width_index = np.argmin(loss, axis=1) # [#step]
+
+        # Use broadcasting to create row indices corresponding to each row
+        # row_indices = np.arange(num_steps)[:, np.newaxis]
+        # breakpoint()
+        # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+        boundary_width = [boundary_width_candidates[i, 0][best_boundary_width_index[i]] for i in range(num_steps)] # [#step]
+        # boundary_width = boundary_width_candidates[:,0][row_indices, best_boundary_width_index] # [#step]
+        boundary_width = np.array(boundary_width)
+        # print(boundary_width.shape)
+
+    bbox = []
+    left_bound = np.clip(center_indexes - boundary_width // 2, 0, num_frames)
+    right_bound = np.clip(center_indexes + boundary_width // 2, 0, num_frames)
+    # breakpoint()
+    bbox = np.stack([left_bound, right_bound], axis=1).round().astype(int)
+
+    return bbox.tolist()
+
+
+
+
+
+# # use optimization to compute pseudo boundary
+# def align_frame_into_steps_op_order(frame_features, step_features, topk=15, threshold=0.5, num_iterations=4, beta=1):
+#     # frame_features:  torch.Size([200, 768])
+#     augment_ratio_list = np.arange(0.5, 2, 0.1)
+
+#     if step_features.shape[0] == 0:
+#         return -np.ones(frame_features.shape[0])
+    
+#     # breakpoint()
+#     # [#step, #frame]
+#     similarity_matrix = compute_sim(step_features, frame_features, config_eval_l2norm).cpu().numpy()
+
+#     num_steps, num_frames = similarity_matrix.shape
+
+#     # Select top-k frames for each caption [#step, #topk]
+#     top_indices = np.argsort(similarity_matrix, axis=1)[:, -topk:]
+#     top_values = np.take_along_axis(similarity_matrix, top_indices, axis=1)
+
+#     # Compute center indexes [#step, 1]
+#     center_indexes = find_center_index(top_indices)[:, np.newaxis]
+
+#     # Update boundary width 
+#     initial_boundary_width = num_frames / num_steps # 1
+#     # boundary_width = initial_boundary_width * np.ones(num_steps, 1, 1) # 1 
+#     # overlap = np.zeros(num_steps)
+
+#     for i in range(num_iterations):
+#         if i == 0:
+#             boundary_width_last = np.full(num_steps, initial_boundary_width).reshape(-1, 1, 1)
+#         #     previous_index_center = None
+#         #     # overlap_weight = 0
+#         else:
+#             boundary_width_last = boundary_width.reshape(-1, 1, 1)
+#             previous_index_center = center_indexes
+#             # overlap_weight = 0.5 * np.sum(overlap)
+        
+#         boundary_width_candidates = augment_ratio_list * boundary_width_last # [#steps, 1, #candidates]
+
+#         index_distance = np.abs(top_indices - center_indexes)[:, :, np.newaxis] # [#step, #topk, 1]
+
+#         loss_sim = np.sum(top_values[:, :, np.newaxis] / index_distance * (np.abs(index_distance - 0.5 * boundary_width_candidates)), axis=1) # [#step, #candidates]
+
+#         if i == 0:
+#             loss = loss_sim # # [#step, #candidates]
+#             print('loss shape:', loss_sim.shape, loss.shape)
+#         else:
+#             # measure the overlap between boundaries given center and boundary width
+#             overlap = compute_overlap(center_indexes, boundary_width_candidates, previous_index_center, boundary_width_last) # [#step, #candidates]
+#             loss = loss_sim + beta * overlap 
+#             print('loss shape:', loss_sim.shape, overlap.shape, loss.shape)
+#         # find the best boundary width
+#         # breakpoint()
+#         best_boundary_width = np.argmin(loss, axis=1) # [#step]
+#         # print(loss.shape, best_boundary_width.shape, boundary_width_candidates.shape)
+#         boundary_width = boundary_width_candidates[:,0][np.arange(num_steps), best_boundary_width] # [#step]
+#         # print(boundary_width.shape)
+
+#     return center_indexes, boundary_width
+# based on original code but change the method to compute center and std
+def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    sorted_index = torch.argsort(-sim, dim=1)
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])]
+
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+
+    iter_bbox_loss = {}
+    for iter in range(iteration):
+        if iter == 0:
+            refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+        else:
+            refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox
+
+
+        # global: from all frames, local: from refined uniform boxes
+    
+        top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+        top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])]
+
+        size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+        if sum(size_local) < (topk-2) * len(size_local):
+            top_indices_list = top_indices_list_global
+            top_values_list = top_values_list_global
+        else:
+            top_indices_list = top_indices_list_local
+            top_values_list = top_values_list_local
+
+        # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+        bbox = []
+        for i in range(len(top_indices_list)):
+            filtered_indices = compute_filtered_indices(top_indices_list[i].tolist(), top_values_list[i].tolist(), threshold)
+            if len(filtered_indices) == 0:
+                filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold)
+                if len(filtered_indices) == 0:
+                    bbox.append(uniform_boxes[i])
+                    continue
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+
+        # compute bbox loss
+        bbox_loss_list = [compute_bbox_loss(top_indices_list[i], bbox[i], top_values_list[i]) for i in range(len(top_indices_list))]
+        bbox_loss = sum(bbox_loss_list)
+        iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox}
+
+    # select the minimum bbox loss and bbox as output
+    min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss'])
+    min_loss = iter_bbox_loss[min_loss_iter]['loss']
+    best_bbox = iter_bbox_loss[min_loss_iter]['bbox']
+        
+
+    return (best_bbox, min_loss)
+
+def align_frame_into_steps_op_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    sorted_index = torch.argsort(-sim, dim=1)
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])]
+
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+
+    iter_bbox_loss = {}
+    for iter in range(iteration):
+        # if iter == 0:
+        #     refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+        # else:
+        #     refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox
+
+
+        # global: from all frames, local: from refined uniform boxes
+    
+        # top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+        # top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])]
+
+        # size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+        # if sum(size_local) < (topk-2) * len(size_local):
+        #     top_indices_list = top_indices_list_global
+        #     top_values_list = top_values_list_global
+        # else:
+        #     top_indices_list = top_indices_list_local
+        #     top_values_list = top_values_list_local
+
+        # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+        bbox = []
+        for i in range(len(top_indices_list_global)):
+            filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_values_list_global[i].tolist(), threshold)
+            if len(filtered_indices) == 0:
+                filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold)
+                if len(filtered_indices) == 0:
+                    bbox.append(uniform_boxes[i])
+                    continue
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+
+        # compute bbox loss
+        bbox_loss_list = [compute_bbox_loss(top_indices_list_global[i], bbox[i], top_values_list_global[i]) for i in range(len(top_indices_list_global))]
+        bbox_loss = sum(bbox_loss_list)
+        iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox}
+
+    # select the minimum bbox loss and bbox as output
+    min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss'])
+    min_loss = iter_bbox_loss[min_loss_iter]['loss']
+    best_bbox = iter_bbox_loss[min_loss_iter]['bbox']
+        
+
+    return (best_bbox, min_loss)
+
+
+
+# pesudo box 4: based on fixed window. the result is bad. give up
+def align_frame_into_steps_mode(frame_features, step_features, topk=15, w=2, ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+
+    bbox = get_mode_box(sim, topk, w, ratio)
+    return bbox
+
+def uniform_window(frame_num, step_num):
+    uniform_timestamps = torch.linspace(0, frame_num, step_num + 1)
+    uniform_timestamps = torch.round(uniform_timestamps).int().tolist()
+    bbox = []
+    for i in range(step_num):
+        bbox.append([uniform_timestamps[i], uniform_timestamps[i+1] - 1])
+
+    # window_size = frame_num // step_num
+    # bbox = []
+    # for i in range(step_num):
+    #     bbox.append([i * window_size, (i + 1) * window_size - 1])
+    # bbox[-1][1] = frame_num - 1
+    return bbox 
+
+def expand_window(uniform_bbox, frame_num, step_num, ratio=1):
+    '''ratio: gt box相对uniform box的波动范围 超过这个范围视为不可能 ratio单位为一个caption的平均长度'''
+    window_size = frame_num // step_num
+    refined_bbox = []
+    for bbox in uniform_bbox:
+        start = max(0, bbox[0] - ratio * window_size)
+        end = min(frame_num - 1, bbox[1] + ratio * window_size)
+        refined_bbox.append([start, end])
+    return refined_bbox
+
+# pesudo box 3: based on sim, consider the order of steps
+def align_frame_into_steps_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+    
+    # old setting (index is wrong)
+    # # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])]
+    # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list]
+    # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+
+    sorted_index = torch.argsort(-sim, dim=1)
+    # global: from all frames, local: from refined uniform boxes
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-2) * len(size_local):
+        top_indices_list = top_indices_list_global
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) == 0:
+            filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append(uniform_boxes[i])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+
+
+
+# based on pbox3, if ratio 1 has enough value, use it otherwise
+def align_frame_into_steps_order_adapt(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, mode='median', ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+    
+    # old setting (index is wrong)
+    # # frame_features, step_features = frame_features.cpu(), step_features.cpu()
+    # index_sim_list = [sim[i][refined_uniform_boxes[i][0]: refined_uniform_boxes[i][1]] for i in range(sim.shape[0])]
+    # top_indices_list = [torch.topk(index_sim, k, dim=0, largest=True, sorted=True)[1] for index_sim in index_sim_list]
+    # # top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+
+    sorted_index = torch.argsort(-sim, dim=1)
+    # global: from all frames, local: from refined uniform boxes
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-1) * len(size_local):
+        flag = 0
+        for i in range(4):
+            refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio+i*0.5)
+            top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+            size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+            if sum(size_local) >= (topk-1) * len(size_local):
+                flag = 1
+                break
+        if flag == 0:
+            top_indices_list = top_indices_list_global
+        else:
+            top_indices_list = top_indices_list_local
+
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        filtered_indices = remove_outliers(top_indices_list[i].tolist(), threshold, mode=mode, w=w)
+        if len(filtered_indices) == 0:
+            filtered_indices = remove_outliers(top_indices_list_global[i].tolist(), 0.5, mode=mode, w=w)
+            if len(filtered_indices) == 0:
+                bbox.append(uniform_boxes[i])
+                continue
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+def step_retrieval_weight_sim_order(frame_features, step_features, unordered=False, topk=15, threshold=2, w=2, ratio=1):
+    # breakpoint()
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    # breakpoint()
+
+    window_sums = torch.nn.functional.conv1d(sim.unsqueeze(1), torch.ones(1, 1, 2 * w + 1)).squeeze()
+    if len(window_sums.shape) == 1:
+        window_sums = window_sums.unsqueeze(0)
+
+
+    sorted_index = torch.argsort(-window_sums, dim=1) + w
+    
+    
+
+    uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
+    refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
+
+    top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
+    top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
+    
+
+    size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
+    if sum(size_local) < (topk-2) * len(size_local):
+        top_indices_list = top_indices_list_global
+    else:
+        top_indices_list = top_indices_list_local
+
+    # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
+
+    bbox = []
+    for i in range(len(top_indices_list)):
+        threshold_value = compute_threshold(top_indices_list[i].tolist(), threshold)
+        filtered_indices = [frame for frame in top_indices_list[i].tolist() if abs(frame - top_indices_list[i][0]) <= threshold_value]
+        if len(filtered_indices) == 0:
+            bbox.append([top_indices_list[i] - w, top_indices_list[i] + w])
+        else:
+            bbox.append([min(filtered_indices), max(filtered_indices)])
+
+    return bbox
+
+# pesudo box 0: based on dtw
+def segment_video_into_steps(frame_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+
+    sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)  # 直接找与每个step最匹配的frame 这样原则上是一对一匹配
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度
+    return optimal_assignment
+
+def align_query_into_steps(query_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(query_features.shape[0])
+
+    sim = compute_sim(step_features, query_features, config_eval_l2norm).cpu()
+    query_features, step_features = query_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits # base其实是从相似度矩阵中选择了一个中间值作为drop cost 这个中间值就是你认为匹配也可以 drop也可以的那个值
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)  # 直接找与每个step最匹配的frame 这样原则上是一对一匹配
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, one_to_one=True, return_labels=True) - 1 # 调节drop cost的大小 从而调节匹配的严格程度
+    return optimal_assignment
+
+# inference时 video和slots之间的匹配
+def segment_video_into_slots(video_features, pred_steps):
+    sim = compute_sim(pred_steps, video_features, l2_norm=config_eval_l2norm).detach()
+    if config_eval_fixed_drop_sim == -1:
+        k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+        baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    else:
+        baseline_logit = torch.tensor(config_eval_fixed_drop_sim)
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])  # making it of shape [1, N]
+    x_drop_costs = -baseline_logits.squeeze()
+    zx_costs = -sim
+
+    z_drop_costs = -baseline_logit.repeat([1, sim.shape[0]]).squeeze()
+    zx_costs = zx_costs - z_drop_costs[0].reshape([1, 1])
+    z_drop_costs = z_drop_costs - z_drop_costs[0]
+    x_drop_costs = x_drop_costs - x_drop_costs[0]
+    segmentation = double_drop_dtw(zx_costs.numpy(), x_drop_costs.numpy(), z_drop_costs.numpy(), return_labels=True) - 1
+    return segmentation
+
+
+# get_index and alignment_to_boundary are used for 'align' based manner
+def get_index(alignment):
+    start_idx, end_idx = [], []
+    for i in range(len(alignment)):
+        if alignment[i] == -1:
+            if i != 0 and alignment[i-1] != -1:
+                end_idx.append(i-1)
+            continue
+        if i == 0:
+            start_idx.append(i)
+        elif alignment[i] != alignment[i-1]:
+            start_idx.append(i)
+            if alignment[i-1] != -1:
+                end_idx.append(i-1)
+        if i == len(alignment) - 1:
+            end_idx.append(i)
+    assert len(start_idx) == len(end_idx)
+    for s, e in zip(start_idx, end_idx):
+        assert alignment[s] <= alignment[e]
+    return start_idx, end_idx
+
+def alignment_to_boundary(alignment, video_frame_num):
+    start_idx, end_idx = get_index(alignment)
+    start_time = start_idx / video_frame_num
+    end_time = end_idx / video_frame_num
+    boundaries = list(zip(start_time, end_time))
+
+    return np.float32(np.stack(boundaries, axis=0))
+
+
+def to_center_duration(alignments):
+    new_alignments = []
+    for alignment in alignments:
+        start, end = alignment[:, 0], alignment[:, 1]
+        center = (start + end) / 2
+        duration = end - start
+        alignment[:, 0], alignment[:, 1] = center, duration
+        new_alignments.append(alignment)
+    return new_alignments
\ No newline at end of file
diff --git a/yc2_univl/backup/pdvc/video_segmentation_ori.py b/yc2_univl/backup/pdvc/video_segmentation_ori.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d06e59f3b5a80fb4e8a765d20287175b03568d4
--- /dev/null
+++ b/yc2_univl/backup/pdvc/video_segmentation_ori.py
@@ -0,0 +1,127 @@
+import torch
+import numpy as np
+import statistics
+
+from pdvc.dp.exact_dp import drop_dtw
+from pdvc.dp.dp_utils import compute_sim
+import statistics
+from sklearn.cluster import KMeans
+
+
+config_eval_l2norm = True 
+config_eval_keep_percentile = 0.48 # Calculated from the data
+config_eval_fixed_drop_sim = -1 
+
+def segment_video_into_steps(frame_features, step_features, unordered=False):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+
+    sim = compute_sim(step_features, frame_features, l2_norm=True).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    k = max([1, int(torch.numel(sim) * config_eval_keep_percentile)])
+    baseline_logit = torch.topk(sim.reshape([-1]), k).values[-1].detach()
+    baseline_logits = baseline_logit.repeat([1, sim.shape[1]])[0]  # making it of shape [1, N]
+    zx_costs, drop_costs = -sim, -baseline_logits
+    zx_costs, drop_costs = [t.detach().cpu().numpy() for t in [zx_costs, drop_costs]]
+    sim = sim.detach().cpu().numpy()
+
+    if unordered:
+        max_vals, optimal_assignment = np.max(sim, axis=0), np.argmax(sim, axis=0)
+        optimal_assignment[max_vals < baseline_logit.item()] = -1
+    else:
+        optimal_assignment = drop_dtw(zx_costs, drop_costs, return_labels=True) - 1
+    return optimal_assignment # [num_frames]
+
+def get_index(alignment):
+    start_idx, end_idx = [], []
+    for i in range(len(alignment)):
+        if alignment[i] == -1:
+            if i != 0 and alignment[i-1] != -1:
+                end_idx.append(i-1)
+            continue
+        if i == 0:
+            start_idx.append(i)
+        elif alignment[i] != alignment[i-1]:
+            start_idx.append(i)
+            if alignment[i-1] != -1:
+                end_idx.append(i-1)
+        if i == len(alignment) - 1:
+            end_idx.append(i)
+    assert len(start_idx) == len(end_idx)
+    for s, e in zip(start_idx, end_idx):
+        assert alignment[s] <= alignment[e]
+    return start_idx, end_idx
+
+def get_index_update(alignment):
+    optimal_alignment = np.append(np.insert(alignment, 0, -1), -1)
+    diff_optimal_alignment = np.diff(optimal_alignment)
+
+    optimal_alignment_end = optimal_alignment.copy()
+    optimal_alignment_end[optimal_alignment_end==-1] = max(optimal_alignment_end) + 1
+    diff_optimal_alignment_end = np.diff(optimal_alignment_end)
+
+    start_idx = np.where(diff_optimal_alignment>0)[0]
+    end_idx = np.where(diff_optimal_alignment_end>0)[0] - 1
+    return start_idx, end_idx
+
+def alignment_to_boundary(alignment, video_frame_num):
+    start_idx, end_idx = get_index(alignment)
+    start_time = start_idx / video_frame_num
+    end_time = end_idx / video_frame_num
+    boundaries = list(zip(start_time, end_time))
+
+    return np.float32(np.stack(boundaries, axis=0))
+
+
+def to_center_duration(alignments):
+    new_alignments = []
+    for alignment in alignments:
+        start, end = alignment[:, 0], alignment[:, 1]
+        center = (start + end) / 2
+        duration = end - start
+        alignment[:, 0], alignment[:, 1] = center, duration
+        new_alignments.append(alignment)
+    return new_alignments
+
+
+def remove_outliers(indices, threshold):
+    # Calculate the mean and standard deviation of the indices
+    median = statistics.median(indices)
+    mean = sum(indices) / len(indices)
+    std_dev = (sum((x - mean) ** 2 for x in indices) / len(indices)) ** 0.5
+
+    # Calculate the threshold for identifying outliers
+    threshold_value = threshold * std_dev
+
+    # Filter out indices that are far from the mean
+    filtered_indices = [i for i in indices if abs(i - median) <= threshold_value]
+
+    return filtered_indices
+
+
+def align_frame_into_steps(frame_features, step_features, unordered=False, k=15, threshold=0.5):
+    if step_features.shape[0] == 0:
+        return -np.ones(frame_features.shape[0])
+    
+    sim = compute_sim(step_features, frame_features, True).cpu()
+    frame_features, step_features = frame_features.cpu(), step_features.cpu()
+
+    top_values, top_indices = torch.topk(sim, k, dim=1, largest=True, sorted=True)
+    bbox = []
+    for i in range(top_indices.shape[0]):
+        filtered_indices = remove_outliers(top_indices[i].tolist(), threshold)
+        bbox.append([min(filtered_indices), max(filtered_indices)])
+    return bbox
+
+if __name__ == '__main__':
+    # frame_features = torch.randn(100, 768)
+    # text_features = torch.randn(8, 768)
+    # alignment = segment_video_into_steps(frame_features, text_features)
+    # breakpoint()
+    arr = [-1,-1,0,1,2,2,2,-1,-1,3,4,4,-1,-1,5,5,5,-1,6,6,7,-1,-1, 8, 8, 9]
+    start, end = get_index(arr)
+    start_1, end_1 = get_index_update(arr)
+    # start = [2, 3, 4, 8, 9, 13, 16, 18]
+    # end = [2, 3, 5, 8, 10, 15, 17, 18]
+    breakpoint()
diff --git a/yc2_univl/backup/test.py b/yc2_univl/backup/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1dcf9d7be821a3db142566cb23914ea96f1c064
--- /dev/null
+++ b/yc2_univl/backup/test.py
@@ -0,0 +1,64 @@
+# from pdvc.video_segmentation import align_frame_into_steps_op
+# import torch 
+
+# # create two tensors
+# frame = torch.rand(200, 768)
+# steps = torch.rand(10, 768)
+
+# bboxs = align_frame_into_steps_op(frame, steps, order=False)
+# # breakpoint()
+# print('done!')
+
+
+# ==================================================================
+# import json 
+
+# filepath = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1/info.json"
+# with open(filepath, 'r') as f:
+#     data = json.load(f)
+
+# val_history = data['history']['val_result_history']
+
+# metric_sum = {}
+# metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+# for k, v in val_history.items():
+#     metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+#     print(f"{k}: {metric_sum[k]}")
+
+# best_epoch = max(metric_sum, key=metric_sum.get)
+# print(val_history[best_epoch]['eval_score'])
+# # write the val_history to a file
+# with open('val.log', 'w') as f:
+#     for k, v in val_history[best_epoch]['eval_score'].items():
+#         f.write(f"{k}: {v}\n")
+# # print(metric_sum)
+# # breakpoint()
+# print('done!')
+
+# ==================================================================
+import os 
+import json
+import sys 
+sys.path.append('/mnt/data/Gvlab/wuhao/code/dibs')
+from misc.utils import create_logger
+save_folder = "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/yc2_ori_pbox(similarity_op_order)_CLIP/similarity_op_order_topf20_beta1_iter3_r1"
+
+val_logger = create_logger(save_folder, 'val.log')
+infos_path = os.path.join(save_folder, 'info.json')
+
+with open(infos_path, 'r') as f:
+    data = json.load(f)
+val_history = data['history']['val_result_history']
+
+metric_sum = {}
+metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+for k, v in val_history.items():
+    metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+    # print(f"{k}: {metric_sum[k]}")
+
+best_epoch = max(metric_sum, key=metric_sum.get)
+best_val_score = val_history[best_epoch]['eval_score']
+val_logger.info(f"Best epoch: {best_epoch}")
+print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+val_logger.info('\nBest Model Performance:\n' + print_info)
+val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
\ No newline at end of file
diff --git a/yc2_univl/backup/train.py b/yc2_univl/backup/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..43c0c73fd63d66eb7055f913723dd086ab80d288
--- /dev/null
+++ b/yc2_univl/backup/train.py
@@ -0,0 +1,671 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"):
+    elements = []
+    # breakpoint()    
+    if len(opt.train_caption_file) == 2:
+        if 'puyu' in opt.train_caption_file[0]:
+            elements.append('howto_puyu')
+        elif 'mixlm' in opt.train_caption_file[0]:
+            elements.append('howto_mixlm')
+        else:
+            elements.append('howto_llama2')
+        elements.append('howto')
+        if 'yc2' in opt.train_caption_file[1]:            
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file[1]:
+            elements.append('anet')
+    else:
+        if 'yc2' in opt.train_caption_file:
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file:
+            elements.append('anet')
+        elif 'howto' in opt.train_caption_file:
+            if 'puyu' in opt.train_caption_file:
+                elements.append('howto_puyu')
+            elif 'mixlm' in opt.train_caption_file:
+                elements.append('howto_mixlm')
+            else:
+                elements.append('howto_llama2')
+            # elements.append('howto')
+
+    if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]:
+        elements.append('clip')
+    elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]:
+        elements.append('univl')
+    # add pbox parameters
+    pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop"
+    elements.append(pbox_type)
+    elements.append(f"top{opt.top_frames}")
+    elements.append(f"r{opt.width_ratio}")
+    elements.append(f"iter{opt.iteration}")
+    elements.append(f"th{opt.width_th}")
+    return os.path.join(save_folder, '_'.join(elements) + '.json')
+    
+
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            
+            
+    if path == path_backup:
+        if path.startswith('/mnt/data'):
+            pass
+        else:
+            # path = '/mnt' + path[6:]
+            print('map failed')
+            exit(1)
+    return path
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    # if use mixlm model
+    saved_path = construct_save_path(opt)
+
+    if 'mixlm' in saved_path:
+        # text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature')
+        mixlm_pbox_path = construct_save_path(opt, save_folder='test').replace('.json', '').replace('test/', '')
+        text_feature_folder_mixlm = os.path.join('/mnt/data/Gvlab/wuhao/code/tmp', 'mix_text_feature', mixlm_pbox_path)
+        os.makedirs(text_feature_folder_mixlm, exist_ok=True)
+        if 'clip' in save_folder or 'CLIP' in save_folder:
+            text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj')
+            text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip'
+        elif 'univl' in save_folder or 'UniVL' in save_folder or 'Uni' in save_folder:
+            text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text')
+            text_feature_folder_puyu = '/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu'
+
+        if not os.path.exists(saved_path):        
+            llama2_pbox_path = saved_path.replace('mixlm', 'llama2')
+            puyu_pbox_path = saved_path.replace('mixlm', 'puyu')
+            with open(llama2_pbox_path, 'r') as f:
+                llama2_pbox = json.load(f)
+            with open(puyu_pbox_path, 'r') as f:
+                puyu_pbox = json.load(f)
+            
+            mixlm_pbox = {}
+            for video_key in llama2_pbox.keys():
+                if llama2_pbox.get(video_key) is None and puyu_pbox.get(video_key) is None:
+                    mixlm_pbox[video_key] = None
+                elif llama2_pbox.get(video_key) is None:
+                    mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'}
+                elif puyu_pbox.get(video_key) is None:
+                    mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'}
+                else:
+                    if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']:
+                        mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'}
+                    else:
+                        mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'}
+            with open(saved_path, 'w') as f:
+                json.dump(mixlm_pbox, f)
+        
+        with open(saved_path, 'r') as f:
+            mixlm_pbox = json.load(f)
+        with open('data/howto/captiondata/howto100m_train_puyu.json', 'r') as f:
+            meta_puyu = json.load(f)
+        with open('data/howto/captiondata/howto100m_train.json', 'r') as f:
+            meta_llama2 = json.load(f)
+        
+        meta_mixlm = {}
+        for video_key in mixlm_pbox.keys():
+            if mixlm_pbox.get(video_key) is not None and (meta_llama2.get(video_key) is not None or meta_puyu.get(video_key) is not None):
+                if mixlm_pbox[video_key]['llm'] == 'llama2':
+                    meta_mixlm[video_key] = meta_llama2[video_key]
+                    llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy')
+                    if not os.path.exists(llama2_feature_path):
+                        continue
+                    # if os.path.exists(llama2_feature_path):
+                    #     os.unlink(llama2_feature_path)
+                    # if not os.path.exists(llama2_feature_path):                        
+                    #     os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy'))
+                    soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy')
+                    # if os.path.exists(soft_link_path):
+                    #     os.unlink(soft_link_path)
+                    if not os.path.exists(soft_link_path):
+                        # print(os.path.exists(soft_link_path), os.path.exists(llama2_feature_path))
+                        os.symlink(llama2_feature_path, soft_link_path)
+                    # text_feature = np.load(llama2_feature_path)
+                    # if text_feature.shape[0] != len(meta_llama2[video_key]['sentences']):
+                    #     print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_llama2[video_key]['sentences'])} sentences found in meta file")
+                else:
+                    meta_mixlm[video_key] = meta_puyu[video_key]
+                    puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy')
+                    if not os.path.exists(puyu_feature_path):
+                        continue
+
+                    soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy')
+
+                    # if os.path.exists(soft_link_path):
+                    #     os.unlink(soft_link_path)
+                    if not os.path.exists(soft_link_path):
+                        os.symlink(puyu_feature_path, soft_link_path)
+                    # text_feature = np.load(puyu_feature_path)
+                    # if text_feature.shape[0] != len(meta_puyu[video_key]['sentences']):
+                    #     print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_puyu[video_key]['sentences'])} sentences found in meta file")
+        with open(os.path.join(save_folder, 'train_caption_mixlm.json'), 'w') as f:
+            json.dump(meta_mixlm, f)
+        opt.train_caption_file[0] = os.path.join(save_folder, 'train_caption_mixlm.json')
+        opt.text_feature_folder[0] = text_feature_folder_mixlm       
+        # pass
+
+
+    if not opt.start_from:
+        backup_envir(save_folder, opt)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    print(opt.text_feature_folder)
+    print(opt.train_caption_file)
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # try to load saved pbox
+    if os.path.exists(saved_path):
+        try:
+            with open(saved_path, 'r') as f:
+                model.pseudo_boxes = json.load(f)
+        except:
+            # delete the bad file
+            os.remove(saved_path)
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # if dt['video_key'][0] != 'LGArj9Do0xc':
+            #     continue
+            # # for fast debugging
+            if opt.test:
+                if trained_samples > 5:
+                    break
+                else:
+                    trained_samples += 1
+            # if trained_samples < 1714:
+            #     trained_samples += 1
+            #     continue
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+            try:
+                output, loss = model(dt, criterion, contrastive_criterion)
+            except Exception as e:
+                print(e)
+                print(dt['video_key'])
+                continue
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+            # breakpoint()
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+
+        if epoch == 1 and model.pseudo_boxes is not None and 'mixlm' not in opt.train_caption_file[0]:
+            # save the pseudo boxes
+            pbox_save_path = construct_save_path(opt)
+            if not os.path.exists(pbox_save_path):
+                with open(pbox_save_path, 'w') as f:
+                    json.dump(model.pseudo_boxes, f)
+
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # save the pesudo box
+
+
+
+            # # ===============================old code==============================================
+            # # load Best model and conduct evaluation
+            # print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            # val_logger = create_logger(save_folder, 'val.log')
+            # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            # model.load_state_dict(loaded_pth['model'], strict=True)
+            # model.eval()
+            # result_json_path = saved_info['best']['result_json_path']
+            # eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            # if opt.caption_decoder_type == 'none':
+            #     current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            # else:
+            #     if opt.criteria_for_best_ckpt == 'dvc':
+            #         current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+            #     else:
+            #         current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            # val_logger.info('\nBest Model Performance:\n' + print_info)
+            # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            # tf_writer.close()
+            # break
+            # =================================new code=========================================================
+            val_logger = create_logger(save_folder, 'val.log')
+            infos_path = os.path.join(save_folder, 'info.json')
+
+            with open(infos_path, 'r') as f:
+                data = json.load(f)
+            val_history = data['history']['val_result_history']
+
+            metric_sum = {}
+            metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+            for k, v in val_history.items():
+                metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+                # print(f"{k}: {metric_sum[k]}")
+
+            best_epoch = max(metric_sum, key=metric_sum.get)
+            best_val_score = val_history[best_epoch]['eval_score']
+            val_logger.info(f"Best epoch: {best_epoch}")
+            print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
+
+            break           
+            
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    # breakpoint()
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_fewshot.py b/yc2_univl/backup/train_fewshot.py
new file mode 100644
index 0000000000000000000000000000000000000000..d35b3feefc80f1a87e4fb30394702c28d04472d6
--- /dev/null
+++ b/yc2_univl/backup/train_fewshot.py
@@ -0,0 +1,482 @@
+# use ft_gt_percent to control the percentage of gt proposals used for finetuning
+
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder, opt)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_dataset = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # if dt['video_key'][0] != 'LGArj9Do0xc':
+            #     continue
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            # if trained_samples < 1714:
+            #     trained_samples += 1
+            #     continue
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_ft.py b/yc2_univl/backup/train_ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdcc497f763607f28dfb1e0a687705c42e448a09
--- /dev/null
+++ b/yc2_univl/backup/train_ft.py
@@ -0,0 +1,513 @@
+# coding:utf-8
+
+''' 
+train_seq2.py is different from train_seq.py in the following aspects:
+
+1. train_seq2.py uses the same dataset for pretraining and target task
+2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs
+3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task
+4. checkpoint is located in save howto_yc2_* or howto_tasty_* 
+5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_*
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet'))
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder):
+        print('the checkpoint folder does not exist')
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq2-ft'
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_ft2_gt.py b/yc2_univl/backup/train_ft2_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b767f5c2525ed10b6551ba02a5551bafe0f1737e
--- /dev/null
+++ b/yc2_univl/backup/train_ft2_gt.py
@@ -0,0 +1,588 @@
+# coding:utf-8
+
+''' 
+similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions)
+
+set pretrain_data_mode to 'single', it is same as train_ft_gt.py.
+
+使用全部的howto subset数据进行pretrain， 然后用部分的gt数据进行fine-tune
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single'
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            
+            
+    if path == path_backup:
+        if path.startswith('/mnt/data'):
+            pass
+        else:
+            # path = '/mnt' + path[6:]
+            print('map failed')
+            exit(1)
+    return path
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+    opt.refine_pseudo_box = False
+    opt.pseudo_box_aug = False
+    # breakpoint()
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder) # .replace('_seq2-ft', '')
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'vlep-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('vlep-yc2_yc2', 'vlep_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder)
+        elif pretrain_data_mode == 'seq':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-anet_anet', 'howto_anet'))
+    
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+    # breakpoint()
+
+    if opt.id_ori != '':
+        checkpoint_folder = checkpoint_folder + '_' + opt.id_ori
+    # breakpoint()
+    # if opt.id == "":
+    #     pass
+    # else:
+    #     checkpoint_folder = checkpoint_folder + '_' + opt.id
+
+    if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_es20'):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder, opt)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        # print('the script only support two dataset for pretrain and target task respectively')
+        # exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        # train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # # load Best model and conduct evaluation
+            # print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            # val_logger = create_logger(save_folder, 'val.log')
+            # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            # model.load_state_dict(loaded_pth['model'], strict=True)
+            # model.eval()
+            # result_json_path = saved_info['best']['result_json_path']
+            # eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            # if opt.caption_decoder_type == 'none':
+            #     current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            # else:
+            #     if opt.criteria_for_best_ckpt == 'dvc':
+            #         current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+            #     else:
+            #         current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            # val_logger.info('\nBest Model Performance:\n' + print_info)
+            # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            # tf_writer.close()
+            # break
+
+            val_logger = create_logger(save_folder, 'val.log')
+            infos_path = os.path.join(save_folder, 'info.json')
+
+            with open(infos_path, 'r') as f:
+                data = json.load(f)
+            val_history = data['history']['val_result_history']
+
+            metric_sum = {}
+            metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+            for k, v in val_history.items():
+                metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+                # print(f"{k}: {metric_sum[k]}")
+
+            best_epoch = max(metric_sum, key=metric_sum.get)
+            best_val_score = val_history[best_epoch]['eval_score']
+            val_logger.info(f"Best epoch: {best_epoch}")
+            print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
+
+            break 
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id_ori = opt.id
+    
+    
+    opt.id = 'seq2-ft({})-gt_percent-{}'.format(pretrain_data_mode, opt.ft_gt_percent)
+    if opt.id_ori != '':
+        opt.id = opt.id + '_' + opt.id_ori
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_ft_gt.py b/yc2_univl/backup/train_ft_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b481c6eb9a19299b401fbe8ce82d10716a846a7c
--- /dev/null
+++ b/yc2_univl/backup/train_ft_gt.py
@@ -0,0 +1,516 @@
+# coding:utf-8
+
+''' 
+train_seq2.py is different from train_seq.py in the following aspects:
+
+1. train_seq2.py uses the same dataset for pretraining and target task
+2. the pretrain dataset and target dataset is not trained one after another in a single epoch. train pretrain dataset for 10 epochs then train target dataset for 20 epochs
+3. the vocabulary is always the same for pretrain and target task i.e. combined vocabulary of pretrain and target task
+4. checkpoint is located in save howto_yc2_* or howto_tasty_* 
+5. cfg use howto-tasty_tasty_* or howto-yc2_yc2_*
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        checkpoint_folder = re.sub(r"_seq2-ft.*", "", save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq2-ft-gt_percent-{}'.format(opt.ft_gt_percent)
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_pre_ft_gt.py b/yc2_univl/backup/train_pre_ft_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..9440eb8b4b86d2123a997285686e704425519a3f
--- /dev/null
+++ b/yc2_univl/backup/train_pre_ft_gt.py
@@ -0,0 +1,537 @@
+# coding:utf-8
+
+''' 
+similar to train_ft_gt.py. it fine-tunes the model on the target dataset with ground-truth annotations. but the pretrain data includes both pretrain and target data (only use captions)
+
+set pretrain_data_mode to 'single', it is same as train_ft_gt.py.
+
+
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+import re 
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single'
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    opt.epoch = 20
+    opt.use_pseudo_box = False
+    opt.refine_pseudo_box = False
+    opt.pseudo_box_aug = False
+
+    # breakpoint()
+    if 'howto-tasty_tasty' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-train", save_folder) # .replace('_seq2-ft', '')
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-tasty_tasty', 'howto_tasty')) # .replace('_seq2-ft', '')
+    elif 'howto-yc2_yc2' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-yc2_yc2', 'howto_yc2')) # .replace('_seq2-ft', '')
+    elif 'howto-anet_anet' in save_folder:
+        if pretrain_data_mode == 'mix':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder)
+        # elif pretrain_data_mode == 'seq':
+        #     checkpoint_folder = re.sub(r"_seq2-ft.*", "_seq-train", save_folder)
+        elif pretrain_data_mode == 'single':
+            checkpoint_folder = re.sub(r"_seq2-pre.*", "_seq-pre_perc-{}".format(opt.pre_percent), save_folder.replace('howto-anet_anet', 'howto_anet'))
+    else:
+        print('the script only support settings howto-XXX_XXX')
+        exit(1)
+
+    if not os.path.exists(checkpoint_folder) and not os.path.exists(checkpoint_folder + '_test'):
+        print('the checkpoint folder {} does not exist'.format(checkpoint_folder))
+        exit(1)
+    else:
+        if not os.path.exists(os.path.join(checkpoint_folder, 'val.log')):
+            # print('the checkpoint folder has no val.log, denoting the setting is not fully trained')
+            for i in range(1, 100):
+                if os.path.exists(f'{checkpoint_folder}_{i}'):
+                    if os.path.exists(os.path.join(f'{checkpoint_folder}_{i}', 'val.log')):
+                        checkpoint_folder = f'{checkpoint_folder}_{i}'
+                        break
+                    else:
+                        continue
+                else:
+                    print(f'{checkpoint_folder}_{i} does not exist')
+                    print('the checkpoint folder does not exist')
+                    exit(1)
+
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder, opt)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # # continue training
+    # if opt.start_from:
+    #     opt.pretrain = False
+    #     infos_path = os.path.join(save_folder, 'info.json')
+    #     with open(infos_path) as f:
+    #         logger.info('Load info from {}'.format(infos_path))
+    #         saved_info = json.load(f)
+    #         prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+    #         exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+    #         for opt_name in prev_opt.keys():
+    #             if opt_name not in exclude_opt:
+    #                 vars(opt).update({opt_name: prev_opt.get(opt_name)})
+    #             if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+    #                 logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+    #                                                                vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        # train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+        #                                     [opt.visual_feature_folder[0]],
+        #                                     [opt.text_feature_folder[0]],
+        #                                     opt.dict_file, True, 'gt',
+        #                                     opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+        # train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+        #                       shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        # train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print('the script only support two dataset for pretrain and target task respectively')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+    # breakpoint()
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+
+    # load pretrained model
+
+    # breakpoint()
+    # load pretrained model
+    model_pth = torch.load(os.path.join(checkpoint_folder, 'model-best.pth'))
+    logger.info('Loading pth from {}'.format(checkpoint_folder))
+    model.load_state_dict(model_pth['model'])
+    
+
+    # # Recover the parameters
+    # if opt.start_from and (not opt.pretrain):
+    #     if opt.start_from_mode == 'best':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+    #     elif opt.start_from_mode == 'last':
+    #         model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+    #     logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+    #     model.load_state_dict(model_pth['model'])
+
+    # # Load the pre-trained model
+    # if opt.pretrain and (not opt.start_from):
+    #     logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+    #     model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+    #     # query_weight = model_pth['model'].pop('query_embed.weight')
+    #     if opt.pretrain == 'encoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+    #         model.load_state_dict(encoder_pth, strict=True)
+    #     elif opt.pretrain == 'decoder':
+    #         encoder_filter = model.get_filter_rule_for_encoder()
+    #         decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+    #         model.load_state_dict(decoder_pth, strict=True)
+    #         pass
+    #     elif opt.pretrain == 'full':
+    #         # model_pth = transfer(model, model_pth)
+    #         model.load_state_dict(model_pth['model'], strict=True)
+    #     else:
+    #         raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr * 0.5}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    # if opt.start_from:
+    optimizer.load_state_dict(model_pth['optimizer'])
+        # lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # breakpoint()
+
+    # Epoch-level iteration
+    # opt.use_pseudo_box = False
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        # for train_loader in train_dataloaders:
+        trained_samples = 0
+        for dt in tqdm(train_loader_target, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader_target) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    
+    opt.id = 'seq2-pre-{}-ft({})-gt'.format(opt.pre_percent, pretrain_data_mode)
+    assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0
+
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_pre_perc.py b/yc2_univl/backup/train_pre_perc.py
new file mode 100644
index 0000000000000000000000000000000000000000..15f50480e382fc5704c5a6e019594b9478bcca11
--- /dev/null
+++ b/yc2_univl/backup/train_pre_perc.py
@@ -0,0 +1,593 @@
+# coding:utf-8
+'''
+cfgs is the same as train.py, but need add an extra argument: pre_percent
+recommend value: 0.1, 0.2, 0.4, 0.6, 0.8, 1
+'''
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+CUDA_LAUNCH_BLOCKING=1
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_folder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+import random
+import numpy as np 
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"):
+    elements = []
+    # breakpoint()    
+    if len(opt.train_caption_file) == 2:
+        if 'puyu' in opt.train_caption_file[0]:
+            elements.append('howto_puyu')
+        elif 'mixlm' in opt.train_caption_file[0]:
+            elements.append('howto_mixlm')
+        else:
+            elements.append('howto_llama2')
+        elements.append('howto')
+        if 'yc2' in opt.train_caption_file[1]:            
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file[1]:
+            elements.append('anet')
+    else:
+        if 'yc2' in opt.train_caption_file:
+            elements.append('yc2')
+        elif 'anet' in opt.train_caption_file:
+            elements.append('anet')
+        elif 'howto' in opt.train_caption_file:
+            if 'puyu' in opt.train_caption_file:
+                elements.append('howto_puyu')
+            elif 'mixlm' in opt.train_caption_file:
+                elements.append('howto_mixlm')
+            else:
+                elements.append('howto_llama2')
+            # elements.append('howto')
+
+    if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]:
+        elements.append('clip')
+    elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]:
+        elements.append('univl')
+    # add pbox parameters
+    pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop"
+    elements.append(pbox_type)
+    elements.append(f"top{opt.top_frames}")
+    elements.append(f"r{opt.width_ratio}")
+    elements.append(f"iter{opt.iteration}")
+    elements.append(f"th{opt.width_th}")
+    return os.path.join(save_folder, '_'.join(elements) + '.json')
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_folder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    # if use mixlm model
+    saved_path = construct_save_path(opt)
+
+    if 'mixlm' in saved_path:
+        text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature')
+        os.makedirs(text_feature_folder_mixlm, exist_ok=True)
+        if 'clip' in save_folder:
+            text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj')
+            text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip'
+        elif 'univl' in save_folder:
+            text_feature_folder_llama2 = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features'
+            text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features'
+
+        if not os.path.exists(saved_path):        
+            llama2_pbox_path = saved_path.replace('mixlm', 'llama2')
+            puyu_pbox_path = saved_path.replace('mixlm', 'puyu')
+            with open(llama2_pbox_path, 'r') as f:
+                llama2_pbox = json.load(f)
+            with open(puyu_pbox_path, 'r') as f:
+                puyu_pbox = json.load(f)
+            
+            mixlm_pbox = {}
+            for video_key in llama2_pbox.keys():
+                if llama2_pbox[video_key] is None and puyu_pbox[video_key] is None:
+                    mixlm_pbox[video_key] = None
+                else:
+                    if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']:
+                        mixlm_pbox[video_key] = {'pbox': llama2_pbox[video_key]['pbox'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'}
+                    else:
+                        mixlm_pbox[video_key] = {'pbox': puyu_pbox[video_key]['pbox'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'}
+            with open(saved_path, 'w') as f:
+                json.dump(mixlm_pbox, f)
+        
+        with open(saved_path, 'r') as f:
+            mixlm_pbox = json.load(f)
+        for video_key in mixlm_pbox.keys():
+            if mixlm_pbox[video_key] is not None:
+                if mixlm_pbox[video_key]['llm'] == 'llama2':
+                    llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy')
+                    os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy'))
+                else:
+                    puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy')
+                    os.symlink(puyu_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy'))
+        opt.text_feature_folder[0] = text_feature_folder_mixlm       
+
+    if not opt.start_from:
+        backup_envir(save_folder, opt)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_subdata = PercentageSubsetDataset(train_dataset_1, opt.pre_percent)
+        train_dataset_2 = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset = torch.utils.data.ConcatDataset([train_dataset_subdata, train_dataset_2])
+        train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_all = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_dataset = PercentageSubsetDataset(train_dataset_all, opt.pre_percent)
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+    g = torch.Generator()
+    g.manual_seed(0)
+
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+    # breakpoint()
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+    refine_pseudo_box_copy = copy.deepcopy(opt.refine_pseudo_box)
+    pseudo_box_aug_copy = copy.deepcopy(opt.pseudo_box_aug)
+
+    while True:
+        # if epoch > opt.start_refine_epoch:
+        #     opt.refine_pseudo_box = refine_pseudo_box_copy
+        #     opt.pseudo_box_aug = pseudo_box_aug_copy
+        #     criterion.refine_pseudo_box = refine_pseudo_box_copy
+        #     criterion.pseudo_box_aug = pseudo_box_aug_copy
+        #     model.opt = opt 
+        # else:
+        #     opt.refine_pseudo_box = False
+        #     opt.pseudo_box_aug = False
+        #     criterion.refine_pseudo_box = False
+        #     criterion.pseudo_box_aug = False
+        #     model.opt = opt
+        
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        trained_samples = 0
+        for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+            # # for fast debugging
+            # if trained_samples > 5:
+            #     break
+            # else:
+            #     trained_samples += 1
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize(opt.device)
+            if opt.debug:
+                # each epoch contains less mini-batches for debugging
+                if (iteration + 1) % 5 == 0:
+                    iteration += 1
+                    break
+            iteration += 1
+
+            optimizer.zero_grad()
+            dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+            dt['video_target'] = [
+                {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                dt['video_target']]
+
+            # Add text encoder
+            # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+            #     captions = list()
+            #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+            #         captions.extend(video_sents) 
+            #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+            #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+            #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+            #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+            #     dt['text_encoder_input'] = text_encoder_input
+
+            # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+            output, loss = model(dt, criterion, contrastive_criterion)
+            final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+            # breakpoint()
+            final_loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+            optimizer.step()
+
+            for loss_k,loss_v in loss.items():
+                loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+            loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+            if opt.device=='cuda':
+                torch.cuda.synchronize()
+
+            losses_log_every = int(len(train_loader) / 10)
+
+            if opt.debug:
+                losses_log_every = 6
+
+            if iteration % losses_log_every == 0:
+                end = time.time()
+                for k in loss_sum.keys():
+                    loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                logger.info(
+                    "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                        .format(opt.id, iteration, epoch, loss_sum,
+                                (end - start) / losses_log_every, bad_video_num))
+
+                tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                for loss_type in loss_sum.keys():
+                    tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                loss_history[iteration] = loss_sum
+                lr_history[iteration] = opt.current_lr
+                loss_sum = OrderedDict()
+                start = time.time()
+                bad_video_num = 0
+                torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                elif opt.criteria_for_best_ckpt == 'overall':
+                    current_score = np.array(eval_score['Bleu_4']).mean() + \
+                    np.array(eval_score['CIDEr']).mean() + \
+                    np.array(eval_score['METEOR']).mean() + \
+                    2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # # load Best model and conduct evaluation
+            # print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            # val_logger = create_logger(save_folder, 'val.log')
+            # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            # model.load_state_dict(loaded_pth['model'], strict=True)
+            # model.eval()
+            # result_json_path = saved_info['best']['result_json_path']
+            # eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            # if opt.caption_decoder_type == 'none':
+            #     current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            # else:
+            #     if opt.criteria_for_best_ckpt == 'dvc':
+            #         current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+            #     else:
+            #         current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            # val_logger.info('\nBest Model Performance:\n' + print_info)
+            # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            # tf_writer.close()
+            # break
+
+            val_logger = create_logger(save_folder, 'val.log')
+            infos_path = os.path.join(save_folder, 'info.json')
+
+            with open(infos_path, 'r') as f:
+                data = json.load(f)
+            val_history = data['history']['val_result_history']
+
+            metric_sum = {}
+            metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall']
+            for k, v in val_history.items():
+                metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics])
+                # print(f"{k}: {metric_sum[k]}")
+
+            best_epoch = max(metric_sum, key=metric_sum.get)
+            best_val_score = val_history[best_epoch]['eval_score']
+            val_logger.info(f"Best epoch: {best_epoch}")
+            print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()])
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch]))
+
+            break  
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-pre_perc-{}'.format(opt.pre_percent)
+    assert opt.pre_percent <= 1.0 and opt.pre_percent >= 0.0
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+    # breakpoint()
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_seq.py b/yc2_univl/backup/train_seq.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a415e180bf2506f1cbef5ce6d0f6f4205e76203
--- /dev/null
+++ b/yc2_univl/backup/train_seq.py
@@ -0,0 +1,457 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        for train_loader in train_dataloaders:
+            trained_samples = 0
+            for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+                # # for fast debugging
+                # if trained_samples > 25:
+                #     break
+                # else:
+                #     trained_samples += 1
+                if opt.device=='cuda':
+                    torch.cuda.synchronize(opt.device)
+                if opt.debug:
+                    # each epoch contains less mini-batches for debugging
+                    if (iteration + 1) % 5 == 0:
+                        iteration += 1
+                        break
+                iteration += 1
+
+                optimizer.zero_grad()
+                dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+                dt['video_target'] = [
+                    {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+                # Add text encoder
+                # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+                #     captions = list()
+                #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+                #         captions.extend(video_sents) 
+                #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+                #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+                #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+                #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+                #     dt['text_encoder_input'] = text_encoder_input
+
+                # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+                output, loss = model(dt, criterion, contrastive_criterion)
+                final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+                # breakpoint()
+                final_loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+                optimizer.step()
+
+                for loss_k,loss_v in loss.items():
+                    loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+                loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+                if opt.device=='cuda':
+                    torch.cuda.synchronize()
+
+                losses_log_every = int(len(train_loader) / 10)
+
+                if opt.debug:
+                    losses_log_every = 6
+
+                if iteration % losses_log_every == 0:
+                    end = time.time()
+                    for k in loss_sum.keys():
+                        loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                    logger.info(
+                        "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                            .format(opt.id, iteration, epoch, loss_sum,
+                                    (end - start) / losses_log_every, bad_video_num))
+
+                    tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                    for loss_type in loss_sum.keys():
+                        tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                    loss_history[iteration] = loss_sum
+                    lr_history[iteration] = opt.current_lr
+                    loss_sum = OrderedDict()
+                    start = time.time()
+                    bad_video_num = 0
+                    torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-train'
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/backup/train_seq_gt.py b/yc2_univl/backup/train_seq_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..235ae3a83169787f2b2db87e71f0fabe2dbc2dc1
--- /dev/null
+++ b/yc2_univl/backup/train_seq_gt.py
@@ -0,0 +1,480 @@
+# coding:utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import time
+import torch
+import os
+import sys
+import collections
+import numpy as np
+from tqdm import tqdm
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
+# print(sys.path)
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warning of tokenizer
+from eval_utils import evaluate
+import opts
+from tensorboardX import SummaryWriter
+from misc.utils import print_alert_message, build_floder, create_logger, backup_envir, print_opt, set_seed
+from data.video_dataset import PropSeqDataset, collate_fn, PercentageSubsetDataset
+from pdvc.pdvc import build
+from collections import OrderedDict
+from transformers import AutoTokenizer, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
+import copy
+
+a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
+r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
+
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
+# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features
+
+def _init_fn(worker_id):
+    np.random.seed(12 + worker_id)
+
+def map_path(path):
+    path_backup = copy.deepcopy(path)
+    # breakpoint()
+    for i, folder in enumerate(a100_folder):
+        if folder in path:
+            path = path.replace(folder, r3090_folder[i])
+            return path
+    if path == path_backup:
+        print('map failed')
+        exit(1)
+
+
+def train(opt):
+    set_seed(opt.seed)
+    save_folder = build_floder(opt)
+    logger = create_logger(save_folder, 'train.log')
+    tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
+
+    if not opt.start_from:
+        backup_envir(save_folder)
+        logger.info('backup evironment completed !')
+
+    saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
+
+    # continue training
+    if opt.start_from:
+        opt.pretrain = False
+        infos_path = os.path.join(save_folder, 'info.json')
+        with open(infos_path) as f:
+            logger.info('Load info from {}'.format(infos_path))
+            saved_info = json.load(f)
+            prev_opt = saved_info[opt.start_from_mode[:4]]['opt']
+
+            exclude_opt = ['start_from', 'start_from_mode', 'pretrain']
+            for opt_name in prev_opt.keys():
+                if opt_name not in exclude_opt:
+                    vars(opt).update({opt_name: prev_opt.get(opt_name)})
+                if prev_opt.get(opt_name) != vars(opt).get(opt_name):
+                    logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
+                                                                   vars(opt).get(opt_name)))
+    if len(opt.visual_feature_folder) == 2:
+        train_dataset_pretrain = PropSeqDataset(opt.train_caption_file[0],
+                                            [opt.visual_feature_folder[0]],
+                                            [opt.text_feature_folder[0]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file[1],
+                                            [opt.visual_feature_folder[1]],
+                                            [opt.text_feature_folder[1]],
+                                            opt.dict_file, True, 'gt',
+                                            opt)
+        # Create the dataset with the specified percentage
+        subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
+
+        # # Create a DataLoader for the subset dataset
+        # subset_dataloader = DataLoader(subset_data, batch_size=64, shuffle=True)
+        
+        train_loader_pretrain = DataLoader(train_dataset_pretrain, batch_size=opt.batch_size,
+                              shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
+                                    shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        
+        train_dataloaders = [train_loader_pretrain, train_loader_target]
+        # train_dataset = torch.utils.data.ConcatDataset([train_dataset_1, train_dataset_2])
+        # train_dataset.translator = train_dataset_1.translator
+
+    else:
+        print(f'the script only support two dataset training while {len(opt.visual_feature_folder)} dataset folders are provided')
+        exit(1)
+        train_dataset_target = PropSeqDataset(opt.train_caption_file,
+                                    opt.visual_feature_folder,
+                                    opt.text_feature_folder,
+                                    opt.dict_file, True, 'gt',
+                                    opt)
+        train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size,
+                                shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+        train_dataloaders = [train_loader_target]
+
+    # val_dataset = PropSeqDataset(opt.val_caption_file,
+    #                              opt.visual_feature_folder,
+    #                              opt.text_feature_folder,
+    #                              opt.dict_file, False, 'gt',
+    #                              opt)
+    if not hasattr(opt, 'dict_file_val'):
+        opt.dict_file_val = opt.dict_file
+        opt.vocab_size_val = opt.vocab_size
+
+    val_dataset = PropSeqDataset(opt.val_caption_file,
+                                opt.visual_feature_folder_val,
+                                opt.text_feature_folder_val,
+                                opt.dict_file, False, 'gt',
+                                opt)
+
+
+    val_loader = DataLoader(val_dataset, batch_size=opt.batch_size_for_eval,
+                            shuffle=False, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
+
+    epoch = saved_info[opt.start_from_mode[:4]].get('epoch', 0)
+    iteration = saved_info[opt.start_from_mode[:4]].get('iter', 0)
+    best_val_score = saved_info[opt.start_from_mode[:4]].get('best_val_score', -1e5)
+    val_result_history = saved_info['history'].get('val_result_history', {})
+    loss_history = saved_info['history'].get('loss_history', {})
+    lr_history = saved_info['history'].get('lr_history', {})
+    opt.current_lr = vars(opt).get('current_lr', opt.lr)
+
+    # Build model
+
+    model, criterion, contrastive_criterion, postprocessors = build(opt)
+    model.translator = train_dataset_target.translator
+    model.train()
+
+    # Recover the parameters
+    if opt.start_from and (not opt.pretrain):
+        if opt.start_from_mode == 'best':
+            model_pth = torch.load(os.path.join(save_folder, 'model-best.pth'))
+        elif opt.start_from_mode == 'last':
+            model_pth = torch.load(os.path.join(save_folder, 'model-last.pth'))
+        logger.info('Loading pth from {}, iteration:{}'.format(save_folder, iteration))
+        model.load_state_dict(model_pth['model'])
+
+    # Load the pre-trained model
+    if opt.pretrain and (not opt.start_from):
+        logger.info('Load pre-trained parameters from {}'.format(opt.pretrain_path))
+        model_pth = torch.load(opt.pretrain_path, map_location=torch.device(opt.device))
+        # query_weight = model_pth['model'].pop('query_embed.weight')
+        if opt.pretrain == 'encoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            encoder_pth = {k:v for k,v in model_pth['model'].items() if encoder_filter(k)}
+            model.load_state_dict(encoder_pth, strict=True)
+        elif opt.pretrain == 'decoder':
+            encoder_filter = model.get_filter_rule_for_encoder()
+            decoder_pth = {k:v for k,v in model_pth['model'].items() if not encoder_filter(k)}
+            model.load_state_dict(decoder_pth, strict=True)
+            pass
+        elif opt.pretrain == 'full':
+            # model_pth = transfer(model, model_pth)
+            model.load_state_dict(model_pth['model'], strict=True)
+        else:
+            raise ValueError("wrong value of opt.pretrain")
+        
+
+    model.to(opt.device)
+
+    # Decide which parameters need to be trained
+    # if (opt.matcher_type =='DTW' or opt.use_pseudo_box) and opt.text_encoder_learning_strategy == 'frozen':
+    #     for _, p in model.text_encoder.named_parameters():
+    #         p.requires_grad = False
+    #         text_encoder_params = list(map(id, model.text_encoder.parameters()))
+    #         other_params = filter(lambda p: id(p) not in text_encoder_params, model.parameters())
+    # else:
+    #     other_params = model.parameters()
+    other_params = model.parameters()
+
+    training_params = [{'params': other_params, 'lr': opt.lr}]
+
+    if opt.optimizer_type == 'adam':
+        optimizer = optim.Adam(training_params, weight_decay=opt.weight_decay)
+
+    elif opt.optimizer_type == 'adamw':
+        optimizer = optim.AdamW(training_params, weight_decay=opt.weight_decay)
+
+    milestone = [opt.learning_rate_decay_start + opt.learning_rate_decay_every * _ for _ in range(int((opt.epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every))]
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestone, gamma=opt.learning_rate_decay_rate)
+
+    # Load tokenizer for text encoder
+    # for i in range(10):
+    #     try:
+    #         if opt.pretrained_language_model == 'UniVL':
+    #             tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+    #         else:
+    #             tokenizer = AutoTokenizer.from_pretrained(opt.pretrained_language_model)
+    #         break
+    #     except:
+    #         print('download error in AutoTokenizer, retry...')
+    #         time.sleep(1)
+
+    if opt.start_from:
+        optimizer.load_state_dict(model_pth['optimizer'])
+        lr_scheduler.step(epoch-1)
+
+    # print the args for debugging  
+    print_opt(opt, model, logger)
+    print_alert_message('Strat training !', logger)
+
+    loss_sum = OrderedDict()
+    bad_video_num = 0
+
+    start = time.time()
+
+    weight_dict = criterion.weight_dict
+    logger.info('loss type: {}'.format(weight_dict.keys()))
+    logger.info('loss weights: {}'.format(weight_dict.values()))
+
+    # Epoch-level iteration
+
+    while True:
+        if True:
+            # scheduled sampling rate update
+            if epoch > opt.scheduled_sampling_start >= 0:
+                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
+                opt.ss_prob = min(opt.basic_ss_prob + opt.scheduled_sampling_increase_prob * frac,
+                                  opt.scheduled_sampling_max_prob)
+                model.caption_head.ss_prob = opt.ss_prob
+
+            print('lr:{}'.format(float(opt.current_lr)))
+            pass
+
+        # breakpoint()
+        # Batch-level iteration
+        opt.use_pseudo_box = False # True for howto, False for yc2/tasty,
+        opt.pseudo_box_aug = False
+        opt.refine_pseudo_box = False
+        # breakpoint()
+
+        for train_loader in train_dataloaders:
+            opt.use_pseudo_box = not opt.use_pseudo_box
+            opt.
+            criterion.opt = opt 
+            criterion.matcher.use_pseudo_box = opt.use_pseudo_box
+
+            # if opt.use_pseudo_box:
+            #     print('howto dataset')
+            # else:
+            #     print('target dataset')
+            trained_samples = 0
+            for dt in tqdm(train_loader, disable=opt.disable_tqdm):
+                # # for fast debugging
+                # if trained_samples > 25:
+                #     break
+                # else:
+                #     trained_samples += 1
+                if opt.device=='cuda':
+                    torch.cuda.synchronize(opt.device)
+                if opt.debug:
+                    # each epoch contains less mini-batches for debugging
+                    if (iteration + 1) % 5 == 0:
+                        iteration += 1
+                        break
+                iteration += 1
+
+                optimizer.zero_grad()
+                dt = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
+                dt['video_target'] = [
+                    {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
+                    dt['video_target']]
+
+                # Add text encoder
+                # if opt.matcher_type == 'DTW' or opt.use_pseudo_box:
+                #     captions = list()
+                #     for video_sents in dt['cap_raw']:  # dt['cap_raw']: [[sent_1, sent_2, ..., sent_n]]
+                #         captions.extend(video_sents) 
+                #     text_encoder_input = tokenizer(captions, return_tensors='pt', truncation=True, padding=True, max_length=opt.max_text_input_len)
+                #     text_encoder_input = {key: _.to(opt.device) if isinstance(_, torch.Tensor) else _ for key, _ in text_encoder_input.items()} 
+                #     # text_encoder_input: {'input_ids': tensor([[  101,  1996,  2307,  ...,     0,     0,     0],...]),  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],...])}
+                #     # len(text_encoder_input['input_ids']) = n * max_text_input_len
+                #     dt['text_encoder_input'] = text_encoder_input
+
+                # dt = collections.defaultdict(lambda: None, dt) # Commented to 
+
+                output, loss = model(dt, criterion, contrastive_criterion)
+                final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
+                # breakpoint()
+                final_loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
+
+                optimizer.step()
+
+                for loss_k,loss_v in loss.items():
+                    loss_sum[loss_k] = loss_sum.get(loss_k, 0)+ loss_v.item()
+                loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
+
+                if opt.device=='cuda':
+                    torch.cuda.synchronize()
+
+                losses_log_every = int(len(train_loader) / 10)
+
+                if opt.debug:
+                    losses_log_every = 6
+
+                if iteration % losses_log_every == 0:
+                    end = time.time()
+                    for k in loss_sum.keys():
+                        loss_sum[k] = np.round(loss_sum[k] /losses_log_every, 3).item()
+
+                    logger.info(
+                        "ID {} iter {} (epoch {}), \nloss = {}, \ntime/iter = {:.3f}, bad_vid = {:.3f}"
+                            .format(opt.id, iteration, epoch, loss_sum,
+                                    (end - start) / losses_log_every, bad_video_num))
+
+                    tf_writer.add_scalar('lr', opt.current_lr, iteration)
+                    for loss_type in loss_sum.keys():
+                        tf_writer.add_scalar(loss_type, loss_sum[loss_type], iteration)
+                    loss_history[iteration] = loss_sum
+                    lr_history[iteration] = opt.current_lr
+                    loss_sum = OrderedDict()
+                    start = time.time()
+                    bad_video_num = 0
+                    torch.cuda.empty_cache()
+
+        # evaluation
+        if (epoch % opt.save_checkpoint_every == 0) and (epoch >= opt.min_epoch_when_save):
+            
+            # Save model
+            saved_pth = {'epoch': epoch,
+                         'model': model.state_dict(),
+                         'optimizer': optimizer.state_dict()}
+
+            if opt.save_all_checkpoint:
+                checkpoint_path = os.path.join(save_folder, 'model_iter_{}.pth'.format(iteration))
+            else:
+                checkpoint_path = os.path.join(save_folder, 'model-last.pth')
+
+            torch.save(saved_pth, checkpoint_path)
+
+            model.eval()
+            result_json_path = os.path.join(save_folder, 'prediction',
+                                         'num{}_epoch{}.json'.format(
+                                             len(val_dataset), epoch))
+            #eval_score, eval_loss = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            # add to tf summary
+            for key in eval_score.keys():
+                tf_writer.add_scalar(key, np.array(eval_score[key]).mean(), iteration)
+
+            # Huabin comment this part for avoiding reporting losses during evaluation
+            # for loss_type in eval_loss.keys():
+            #     tf_writer.add_scalar('eval_' + loss_type, eval_loss[loss_type], iteration)
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            logger.info('\nValidation results of iter {}:\n'.format(iteration) + print_info)
+            logger.info('\noverall score of iter {}: {}\n'.format(iteration, current_score))
+            val_result_history[epoch] = {'eval_score': eval_score}
+            logger.info('Save model at iter {} to {}.'.format(iteration, checkpoint_path))
+
+            # save the model parameter and  of best epoch
+            if current_score >= best_val_score:
+                best_val_score = current_score
+                best_epoch = epoch
+                saved_info['best'] = {'opt': vars(opt),
+                                      'iter': iteration,
+                                      'epoch': best_epoch,
+                                      'best_val_score': best_val_score,
+                                      'result_json_path': result_json_path,
+                                      'avg_proposal_num': eval_score['avg_proposal_number'],
+                                      'Precision': eval_score['Precision'],
+                                      'Recall': eval_score['Recall']
+                                      }
+
+                # suffix = "RL" if sc_flag else "CE"
+                torch.save(saved_pth, os.path.join(save_folder, 'model-best.pth'))
+                logger.info('Save Best-model at iter {} to checkpoint file.'.format(iteration))
+
+            saved_info['last'] = {'opt': vars(opt),
+                                  'iter': iteration,
+                                  'epoch': epoch,
+                                  'best_val_score': best_val_score,
+                                  }
+            saved_info['history'] = {'val_result_history': val_result_history,
+                                     'loss_history': loss_history,
+                                     'lr_history': lr_history,
+                                     # 'query_matched_fre_hist': query_matched_fre_hist,
+                                     }
+            with open(os.path.join(save_folder, 'info.json'), 'w') as f:
+                json.dump(saved_info, f)
+            logger.info('Save info to info.json')
+
+            model.train()
+
+        epoch += 1
+        lr_scheduler.step()
+        opt.current_lr = optimizer.param_groups[0]['lr']
+        torch.cuda.empty_cache()
+        # Stop criterion
+        if epoch >= opt.epoch:
+            # load Best model and conduct evaluation
+            print('====== Conduct the Final Evaluation to test Best Checkpoint ======')
+            val_logger = create_logger(save_folder, 'val.log')
+            loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda')
+            model.load_state_dict(loaded_pth['model'], strict=True)
+            model.eval()
+            result_json_path = saved_info['best']['result_json_path']
+            eval_score, _ = evaluate(model, criterion,  postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug)
+            if opt.caption_decoder_type == 'none':
+                current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall'])
+            else:
+                if opt.criteria_for_best_ckpt == 'dvc':
+                    current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean()
+                else:
+                    current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean()
+
+            _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)]
+            print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()])
+            val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter']))
+            val_logger.info('\nBest Model Performance:\n' + print_info)
+            val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score))
+
+            tf_writer.close()
+            break
+
+    return saved_info
+
+
+if __name__ == '__main__':
+    opt = opts.parse_opts()
+    opt.id = 'seq-gt_percent_{}'.format(opt.ft_gt_percent)
+    assert opt.ft_gt_percent <= 1.0 and opt.ft_gt_percent >= 0.0
+
+    if not hasattr(opt, 'visual_feature_folder_val'):
+        opt.visual_feature_folder_val = opt.visual_feature_folder
+        opt.text_feature_folder_val = opt.text_feature_folder
+
+    if opt.map:
+        opt.visual_feature_folder = [map_path(path) for path in opt.visual_feature_folder]
+        opt.text_feature_folder = [map_path(path) for path in opt.text_feature_folder]
+        opt.visual_feature_folder_val = [map_path(path) for path in opt.visual_feature_folder_val]
+        opt.text_feature_folder_val = [map_path(path) for path in opt.text_feature_folder_val]
+
+    if opt.gpu_id:
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in opt.gpu_id])
+    if opt.disable_cudnn:
+        torch.backends.cudnn.enabled = False
+
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # to avoid OMP problem on macos
+    # breakpoint()
+    train(opt)
+
diff --git a/yc2_univl/info.json b/yc2_univl/info.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0ef7913dc5040eadde57551f13d9f8da312bcff
--- /dev/null
+++ b/yc2_univl/info.json
@@ -0,0 +1 @@
+{"best": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 15996, "epoch": 11, "best_val_score": 0.5868440997381064, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/prediction/num457_epoch11.json", "avg_proposal_num": -1, "Precision": 0.4513424333993264, "Recall": 0.30795469953703025}, "last": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 26660, "epoch": 19, "best_val_score": 0.5868440997381064}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.16894357888730638, "Bleu_2": 0.09902176620134434, "Bleu_3": 0.05312286436412136, "Bleu_4": 0.026212861867102137, "METEOR": 0.0791142699299577, "ROUGE_L": 0.15563765109454591, "CIDEr": 0.4087091055845523, "Recall": 0.1991554685892762, "Precision": 0.40083793546594454, "soda_c": 0.05642652494419026, "para_Bleu_1": 0.28013834967939705, "para_Bleu_2": 0.16393959632782257, "para_Bleu_3": 0.09809744775628881, "para_Bleu_4": 0.060378126412557326, "para_METEOR": 0.1286956339033507, "para_ROUGE_L": 0.29903071052996405, "para_CIDEr": 0.14675303603221324, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.18247710374533507, "Bleu_2": 0.10433126216854799, "Bleu_3": 0.05471515540980739, "Bleu_4": 0.025315544998990337, "METEOR": 0.08392673175891194, "ROUGE_L": 0.16810710582244187, "CIDEr": 0.48711946137609907, "Recall": 0.23104975652842194, "Precision": 0.4442690424090867, "soda_c": 0.06454827356060923, "para_Bleu_1": 0.27953804293947354, "para_Bleu_2": 0.1635778619591909, "para_Bleu_3": 0.09761782578266559, "para_Bleu_4": 0.060085255296605154, "para_METEOR": 0.13134445752685775, "para_ROUGE_L": 0.3040652157082556, "para_CIDEr": 0.15701615141849948, "avg_proposal_number": -1}}, "2": {"eval_score": {"Bleu_1": 0.18812761655735627, "Bleu_2": 0.11394688266117041, "Bleu_3": 0.06350983100569632, "Bleu_4": 0.03295035253718016, "METEOR": 0.08673497362280043, "ROUGE_L": 0.17099683701262633, "CIDEr": 0.534654554166069, "Recall": 0.2545535313519452, "Precision": 0.4357073390990242, "soda_c": 0.06940030844072555, "para_Bleu_1": 0.31911536052560924, "para_Bleu_2": 0.19074275606485158, "para_Bleu_3": 0.11503629156908896, "para_Bleu_4": 0.07096292455051724, "para_METEOR": 0.14141970569772275, "para_ROUGE_L": 0.3133292457236414, "para_CIDEr": 0.18756071216976763, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.19536023703614988, "Bleu_2": 0.11676341716851109, "Bleu_3": 0.06337153157323498, "Bleu_4": 0.031788948303475714, "METEOR": 0.09287502887069582, "ROUGE_L": 0.18168372139225142, "CIDEr": 0.5345089450528974, "Recall": 0.26186565000159123, "Precision": 0.4578470702650138, "soda_c": 0.06891495599002981, "para_Bleu_1": 0.3645537642333956, "para_Bleu_2": 0.21504928179111618, "para_Bleu_3": 0.1297486406737134, "para_Bleu_4": 0.08010111193897063, "para_METEOR": 0.1518569517959942, "para_ROUGE_L": 0.3241825281759821, "para_CIDEr": 0.22211083978975357, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.19366491706119263, "Bleu_2": 0.1161802397372496, "Bleu_3": 0.06381908710297783, "Bleu_4": 0.0310996008751752, "METEOR": 0.0900086447067842, "ROUGE_L": 0.1772625018945245, "CIDEr": 0.5329339889166991, "Recall": 0.27822837264850414, "Precision": 0.4414053002674447, "soda_c": 0.0725148309247326, "para_Bleu_1": 0.36779729697992286, "para_Bleu_2": 0.2189609464261768, "para_Bleu_3": 0.13170237886801614, "para_Bleu_4": 0.08102932652379062, "para_METEOR": 0.15287168689015676, "para_ROUGE_L": 0.32609559286330886, "para_CIDEr": 0.24981796796266917, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.19874944106127662, "Bleu_2": 0.12266046915797622, "Bleu_3": 0.07150852984916518, "Bleu_4": 0.036185181004552064, "METEOR": 0.09274687098087099, "ROUGE_L": 0.18413336093424784, "CIDEr": 0.5727051685734265, "Recall": 0.259037909270404, "Precision": 0.451289465457956, "soda_c": 0.07263494732248185, "para_Bleu_1": 0.32307562783294125, "para_Bleu_2": 0.1944214796418441, "para_Bleu_3": 0.11901149393254483, "para_Bleu_4": 0.07454555120453704, "para_METEOR": 0.14324209261218024, "para_ROUGE_L": 0.31918573126228, "para_CIDEr": 0.23096832321460165}}, "6": {"eval_score": {"Bleu_1": 0.2003309018825777, "Bleu_2": 0.1225756065112458, "Bleu_3": 0.06724461390362559, "Bleu_4": 0.033684328156599955, "METEOR": 0.0938288297360794, "ROUGE_L": 0.1832565856913202, "CIDEr": 0.5805494889367487, "Recall": 0.28578288505804933, "Precision": 0.4570872842207636, "soda_c": 0.07457933387713374, "para_Bleu_1": 0.3713316702717572, "para_Bleu_2": 0.22391267992808692, "para_Bleu_3": 0.1360620228892395, "para_Bleu_4": 0.08475146307949002, "para_METEOR": 0.15553928732702577, "para_ROUGE_L": 0.3279787647771023, "para_CIDEr": 0.24807495620487915, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.19584871429233122, "Bleu_2": 0.1203954133477019, "Bleu_3": 0.06765236989260215, "Bleu_4": 0.03515047236439923, "METEOR": 0.09347581038898298, "ROUGE_L": 0.18336361365161372, "CIDEr": 0.5642570328531701, "Recall": 0.287053410514844, "Precision": 0.4506790316418327, "soda_c": 0.07315525040409161, "para_Bleu_1": 0.39595219023577966, "para_Bleu_2": 0.23717913606151478, "para_Bleu_3": 0.14480681642134902, "para_Bleu_4": 0.0901695364250172, "para_METEOR": 0.16127903027678414, "para_ROUGE_L": 0.3324403291093838, "para_CIDEr": 0.23804687234043756, "avg_proposal_number": -1}}, "8": {"eval_score": {"Bleu_1": 0.19696025394358163, "Bleu_2": 0.12042554867022627, "Bleu_3": 0.06805715701089529, "Bleu_4": 0.034063345644385214, "METEOR": 0.09208296372249718, "ROUGE_L": 0.1803782633150628, "CIDEr": 0.5812603125344058, "Recall": 0.29169024735901117, "Precision": 0.44299129936438486, "soda_c": 0.07606608300691252, "para_Bleu_1": 0.383549187276652, "para_Bleu_2": 0.23192713278728125, "para_Bleu_3": 0.14217181061136971, "para_Bleu_4": 0.0892715976218228, "para_METEOR": 0.16074434603101373, "para_ROUGE_L": 0.3336567463040183, "para_CIDEr": 0.2859809872200661, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.20446290018298774, "Bleu_2": 0.12418412895577716, "Bleu_3": 0.06899010124646034, "Bleu_4": 0.03428116460131532, "METEOR": 0.09595521703655657, "ROUGE_L": 0.1876517650928566, "CIDEr": 0.5887832993219201, "Recall": 0.3017153873964599, "Precision": 0.4588439095550697, "soda_c": 0.07875391677883807, "para_Bleu_1": 0.3953706124668704, "para_Bleu_2": 0.24043007714841402, "para_Bleu_3": 0.14833197751929023, "para_Bleu_4": 0.09386644902900565, "para_METEOR": 0.16476396966168239, "para_ROUGE_L": 0.33760319454244797, "para_CIDEr": 0.31194480042956774, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.19267153393038786, "Bleu_2": 0.11732781330402656, "Bleu_3": 0.06746115616325608, "Bleu_4": 0.03425583839334337, "METEOR": 0.08963300348041837, "ROUGE_L": 0.17480207136309905, "CIDEr": 0.575137603362526, "Recall": 0.30432682743951917, "Precision": 0.4353044354138446, "soda_c": 0.07762847290423684, "para_Bleu_1": 0.393384019586376, "para_Bleu_2": 0.23835405770332685, "para_Bleu_3": 0.14545808678454117, "para_Bleu_4": 0.09085202435904723, "para_METEOR": 0.16354570345255123, "para_ROUGE_L": 0.3343729651839732, "para_CIDEr": 0.27098453497923136}}, "11": {"eval_score": {"Bleu_1": 0.1989422607268001, "Bleu_2": 0.12223038556953512, "Bleu_3": 0.06835990671747892, "Bleu_4": 0.03486159828438583, "METEOR": 0.09408978838449876, "ROUGE_L": 0.18200142867223945, "CIDEr": 0.593480700759431, "Recall": 0.30795469953703025, "Precision": 0.4513424333993264, "soda_c": 0.0796861065455984, "para_Bleu_1": 0.39594509057043764, "para_Bleu_2": 0.24087109399513515, "para_Bleu_3": 0.14790262814870953, "para_Bleu_4": 0.09321042711819619, "para_METEOR": 0.1655617051143519, "para_ROUGE_L": 0.3391051008488012, "para_CIDEr": 0.32807196750555834, "avg_proposal_number": -1}}, "12": {"eval_score": {"Bleu_1": 0.19294534256446427, "Bleu_2": 0.11789730285267924, "Bleu_3": 0.06601509377472357, "Bleu_4": 0.03274421971508606, "METEOR": 0.0906445074413136, "ROUGE_L": 0.17678145420382357, "CIDEr": 0.5750907875125135, "Recall": 0.3073352674556176, "Precision": 0.4434536834427428, "soda_c": 0.07896521325127955, "para_Bleu_1": 0.39483511792471604, "para_Bleu_2": 0.23988438429479647, "para_Bleu_3": 0.1464330354033768, "para_Bleu_4": 0.09122283851671699, "para_METEOR": 0.16480200992253577, "para_ROUGE_L": 0.33317486176302236, "para_CIDEr": 0.29080350784714515}}, "13": {"eval_score": {"Bleu_1": 0.1916652028982354, "Bleu_2": 0.11864819375256218, "Bleu_3": 0.06801290454817709, "Bleu_4": 0.03421778123301331, "METEOR": 0.08890100804282676, "ROUGE_L": 0.17229926562968575, "CIDEr": 0.5719694906113042, "Recall": 0.3115151404333572, "Precision": 0.42734448265082836, "soda_c": 0.07979305036983636, "para_Bleu_1": 0.3972508455506424, "para_Bleu_2": 0.24317507500304622, "para_Bleu_3": 0.1497047997976745, "para_Bleu_4": 0.09437727320664267, "para_METEOR": 0.16651343432042678, "para_ROUGE_L": 0.33875534436877147, "para_CIDEr": 0.29220356232363026}}, "14": {"eval_score": {"Bleu_1": 0.19012877786294885, "Bleu_2": 0.11743680046097797, "Bleu_3": 0.06623934110461578, "Bleu_4": 0.03314975306654321, "METEOR": 0.08857227272587216, "ROUGE_L": 0.17208518718096077, "CIDEr": 0.5689998070546577, "Recall": 0.3090681299310951, "Precision": 0.43095498593310433, "soda_c": 0.08081534748318767, "para_Bleu_1": 0.3949292262433903, "para_Bleu_2": 0.24183495416706074, "para_Bleu_3": 0.1493168425692173, "para_Bleu_4": 0.0941904023418332, "para_METEOR": 0.16661877157717606, "para_ROUGE_L": 0.3391544295873436, "para_CIDEr": 0.3057631644012313}}, "15": {"eval_score": {"Bleu_1": 0.1927355202990476, "Bleu_2": 0.11755729236198051, "Bleu_3": 0.06532950485231373, "Bleu_4": 0.0318670348131602, "METEOR": 0.08966953019840175, "ROUGE_L": 0.17549405824640266, "CIDEr": 0.5708533801009449, "Recall": 0.31055728552993345, "Precision": 0.4412863394810881, "soda_c": 0.08079399116249976, "para_Bleu_1": 0.3847850395827542, "para_Bleu_2": 0.23591168028694995, "para_Bleu_3": 0.14500000021146267, "para_Bleu_4": 0.09097906463153684, "para_METEOR": 0.1633729521776342, "para_ROUGE_L": 0.33764324525807, "para_CIDEr": 0.3225522700715415}}, "16": {"eval_score": {"Bleu_1": 0.1905629005997804, "Bleu_2": 0.11689699082903934, "Bleu_3": 0.06544029555928756, "Bleu_4": 0.03330988693345351, "METEOR": 0.08938496175202132, "ROUGE_L": 0.17298359351524648, "CIDEr": 0.5732307929342625, "Recall": 0.309604513071417, "Precision": 0.43046524955715343, "soda_c": 0.08056479007503722, "para_Bleu_1": 0.3975304274857351, "para_Bleu_2": 0.24253918136446623, "para_Bleu_3": 0.14848895422464012, "para_Bleu_4": 0.09337330751749118, "para_METEOR": 0.16677196164785574, "para_ROUGE_L": 0.33750187221117683, "para_CIDEr": 0.31278894258081524}}, "17": {"eval_score": {"Bleu_1": 0.19099469488969467, "Bleu_2": 0.11646897839764006, "Bleu_3": 0.06451308365995856, "Bleu_4": 0.032200079484133, "METEOR": 0.08912416771202449, "ROUGE_L": 0.1730757893125124, "CIDEr": 0.5693051160396969, "Recall": 0.3097042977992106, "Precision": 0.43274547601681085, "soda_c": 0.08084297498321232, "para_Bleu_1": 0.3924031546442418, "para_Bleu_2": 0.23911474626028398, "para_Bleu_3": 0.14600811918196227, "para_Bleu_4": 0.09107950853175292, "para_METEOR": 0.16594454181978452, "para_ROUGE_L": 0.33729101832099057, "para_CIDEr": 0.30892642009784}}, "18": {"eval_score": {"Bleu_1": 0.19191750615066444, "Bleu_2": 0.11783589874301872, "Bleu_3": 0.06597231596326529, "Bleu_4": 0.03167603834812624, "METEOR": 0.08996609888818348, "ROUGE_L": 0.1746391859525846, "CIDEr": 0.5689023016363987, "Recall": 0.31503357525649683, "Precision": 0.4376628112951966, "soda_c": 0.08097707611185051, "para_Bleu_1": 0.3977375551078834, "para_Bleu_2": 0.24323062675170298, "para_Bleu_3": 0.1488548587270082, "para_Bleu_4": 0.09292110149283073, "para_METEOR": 0.16716298804356167, "para_ROUGE_L": 0.33781551083855066, "para_CIDEr": 0.31014493696748857}}, "19": {"eval_score": {"Bleu_1": 0.1908811984292725, "Bleu_2": 0.11664270449592412, "Bleu_3": 0.06546844271584715, "Bleu_4": 0.03266470081303028, "METEOR": 0.08981101020496235, "ROUGE_L": 0.17382953846907112, "CIDEr": 0.5716745559959934, "Recall": 0.31292035599338697, "Precision": 0.4345220728699943, "soda_c": 0.08127095018359767, "para_Bleu_1": 0.40170065588267356, "para_Bleu_2": 0.2447870245859959, "para_Bleu_3": 0.14990588787772124, "para_Bleu_4": 0.09419227635900729, "para_METEOR": 0.16780671784283924, "para_ROUGE_L": 0.33845945539662686, "para_CIDEr": 0.3198675630646056}}}, "loss_history": {"133": {"loss_ce": 0.336, "loss_counter": 0.129, "loss_bbox": 0.039, "loss_giou": 0.368, "loss_self_iou": 0.028, "cardinality_error": 7.797, "loss_ce_0": 0.337, "loss_counter_0": 0.13, "loss_bbox_0": 0.041, "loss_giou_0": 0.381, "loss_self_iou_0": 0.03, "cardinality_error_0": 7.797, "loss_caption_0": 2.755, "loss_caption": 2.681, "total_loss": 15.341}, "266": {"loss_ce": 0.324, "loss_counter": 0.129, "loss_bbox": 0.036, "loss_giou": 0.369, "loss_self_iou": 0.018, "cardinality_error": 7.812, "loss_ce_0": 0.341, "loss_counter_0": 0.132, "loss_bbox_0": 0.039, "loss_giou_0": 0.38, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.812, "loss_caption_0": 2.803, "loss_caption": 2.638, "total_loss": 15.341}, "399": {"loss_ce": 0.312, "loss_counter": 0.13, "loss_bbox": 0.039, "loss_giou": 0.375, "loss_self_iou": 0.02, "cardinality_error": 7.835, "loss_ce_0": 0.324, "loss_counter_0": 0.132, "loss_bbox_0": 0.043, "loss_giou_0": 0.395, "loss_self_iou_0": 0.021, "cardinality_error_0": 7.835, "loss_caption_0": 2.81, "loss_caption": 2.676, "total_loss": 15.459}, "532": {"loss_ce": 0.307, "loss_counter": 0.133, "loss_bbox": 0.044, "loss_giou": 0.394, "loss_self_iou": 0.02, "cardinality_error": 7.902, "loss_ce_0": 0.319, "loss_counter_0": 0.133, "loss_bbox_0": 0.05, "loss_giou_0": 0.421, "loss_self_iou_0": 0.026, "cardinality_error_0": 7.902, "loss_caption_0": 2.817, "loss_caption": 2.654, "total_loss": 15.588}, "665": {"loss_ce": 0.312, "loss_counter": 0.135, "loss_bbox": 0.034, "loss_giou": 0.345, "loss_self_iou": 0.017, "cardinality_error": 7.805, "loss_ce_0": 0.319, "loss_counter_0": 0.131, "loss_bbox_0": 0.038, "loss_giou_0": 0.372, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.805, "loss_caption_0": 2.758, "loss_caption": 2.635, "total_loss": 15.049}, "798": {"loss_ce": 0.321, "loss_counter": 0.125, "loss_bbox": 0.03, "loss_giou": 0.319, "loss_self_iou": 0.015, "cardinality_error": 7.774, "loss_ce_0": 0.331, "loss_counter_0": 0.124, "loss_bbox_0": 0.032, "loss_giou_0": 0.344, "loss_self_iou_0": 0.015, "cardinality_error_0": 7.774, "loss_caption_0": 2.66, "loss_caption": 2.559, "total_loss": 14.519}, "931": {"loss_ce": 0.327, "loss_counter": 0.122, "loss_bbox": 0.027, "loss_giou": 0.306, "loss_self_iou": 0.011, "cardinality_error": 7.865, "loss_ce_0": 0.346, "loss_counter_0": 0.123, "loss_bbox_0": 0.029, "loss_giou_0": 0.327, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.865, "loss_caption_0": 2.54, "loss_caption": 2.468, "total_loss": 14.017}, "1064": {"loss_ce": 0.331, "loss_counter": 0.121, "loss_bbox": 0.027, "loss_giou": 0.292, "loss_self_iou": 0.01, "cardinality_error": 7.579, "loss_ce_0": 0.345, "loss_counter_0": 0.127, "loss_bbox_0": 0.028, "loss_giou_0": 0.311, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.579, "loss_caption_0": 2.639, "loss_caption": 2.626, "total_loss": 14.419}, "1197": {"loss_ce": 0.325, "loss_counter": 0.118, "loss_bbox": 0.026, "loss_giou": 0.296, "loss_self_iou": 0.011, "cardinality_error": 7.241, "loss_ce_0": 0.339, "loss_counter_0": 0.121, "loss_bbox_0": 0.028, "loss_giou_0": 0.317, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.241, "loss_caption_0": 2.501, "loss_caption": 2.496, "total_loss": 13.892}, "1330": {"loss_ce": 0.327, "loss_counter": 0.126, "loss_bbox": 0.026, "loss_giou": 0.304, "loss_self_iou": 0.011, "cardinality_error": 7.94, "loss_ce_0": 0.334, "loss_counter_0": 0.127, "loss_bbox_0": 0.029, "loss_giou_0": 0.332, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.94, "loss_caption_0": 2.635, "loss_caption": 2.619, "total_loss": 14.504}, "1463": {"loss_ce": 0.322, "loss_counter": 0.128, "loss_bbox": 0.026, "loss_giou": 0.301, "loss_self_iou": 0.011, "cardinality_error": 7.699, "loss_ce_0": 0.335, "loss_counter_0": 0.129, "loss_bbox_0": 0.026, "loss_giou_0": 0.316, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.699, "loss_caption_0": 2.448, "loss_caption": 2.462, "total_loss": 13.729}, "1596": {"loss_ce": 0.311, "loss_counter": 0.126, "loss_bbox": 0.022, "loss_giou": 0.284, "loss_self_iou": 0.01, "cardinality_error": 8.233, "loss_ce_0": 0.322, "loss_counter_0": 0.123, "loss_bbox_0": 0.024, "loss_giou_0": 0.31, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.233, "loss_caption_0": 2.348, "loss_caption": 2.348, "total_loss": 13.16}, "1729": {"loss_ce": 0.311, "loss_counter": 0.124, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 7.632, "loss_ce_0": 0.32, "loss_counter_0": 0.124, "loss_bbox_0": 0.026, "loss_giou_0": 0.307, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.632, "loss_caption_0": 2.363, "loss_caption": 2.353, "total_loss": 13.14}, "1862": {"loss_ce": 0.316, "loss_counter": 0.12, "loss_bbox": 0.023, "loss_giou": 0.268, "loss_self_iou": 0.01, "cardinality_error": 7.609, "loss_ce_0": 0.32, "loss_counter_0": 0.119, "loss_bbox_0": 0.025, "loss_giou_0": 0.29, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.609, "loss_caption_0": 2.439, "loss_caption": 2.419, "total_loss": 13.343}, "1995": {"loss_ce": 0.314, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.281, "loss_self_iou": 0.009, "cardinality_error": 7.541, "loss_ce_0": 0.322, "loss_counter_0": 0.122, "loss_bbox_0": 0.025, "loss_giou_0": 0.309, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.541, "loss_caption_0": 2.503, "loss_caption": 2.503, "total_loss": 13.766}, "2128": {"loss_ce": 0.316, "loss_counter": 0.126, "loss_bbox": 0.024, "loss_giou": 0.284, "loss_self_iou": 0.009, "cardinality_error": 7.789, "loss_ce_0": 0.324, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.301, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.789, "loss_caption_0": 2.5, "loss_caption": 2.493, "total_loss": 13.73}, "2261": {"loss_ce": 0.31, "loss_counter": 0.122, "loss_bbox": 0.023, "loss_giou": 0.285, "loss_self_iou": 0.012, "cardinality_error": 7.902, "loss_ce_0": 0.316, "loss_counter_0": 0.12, "loss_bbox_0": 0.025, "loss_giou_0": 0.304, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.902, "loss_caption_0": 2.425, "loss_caption": 2.424, "total_loss": 13.426}, "2394": {"loss_ce": 0.315, "loss_counter": 0.126, "loss_bbox": 0.025, "loss_giou": 0.29, "loss_self_iou": 0.011, "cardinality_error": 7.534, "loss_ce_0": 0.323, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.308, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.534, "loss_caption_0": 2.439, "loss_caption": 2.435, "total_loss": 13.54}, "2527": {"loss_ce": 0.313, "loss_counter": 0.125, "loss_bbox": 0.023, "loss_giou": 0.276, "loss_self_iou": 0.009, "cardinality_error": 7.647, "loss_ce_0": 0.319, "loss_counter_0": 0.123, "loss_bbox_0": 0.025, "loss_giou_0": 0.296, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.647, "loss_caption_0": 2.454, "loss_caption": 2.455, "total_loss": 13.492}, "2660": {"loss_ce": 0.313, "loss_counter": 0.131, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 8.0, "loss_ce_0": 0.317, "loss_counter_0": 0.128, "loss_bbox_0": 0.026, "loss_giou_0": 0.294, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.0, "loss_caption_0": 2.464, "loss_caption": 2.451, "total_loss": 13.487}, "2793": {"loss_ce": 0.309, "loss_counter": 0.119, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.01, "cardinality_error": 7.556, "loss_ce_0": 0.312, "loss_counter_0": 0.118, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.556, "loss_caption_0": 2.27, "loss_caption": 2.276, "total_loss": 12.632}, "2926": {"loss_ce": 0.313, "loss_counter": 0.121, "loss_bbox": 0.023, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.444, "loss_ce_0": 0.317, "loss_counter_0": 0.118, "loss_bbox_0": 0.025, "loss_giou_0": 0.287, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.444, "loss_caption_0": 2.276, "loss_caption": 2.291, "total_loss": 12.726}, "3059": {"loss_ce": 0.298, "loss_counter": 0.127, "loss_bbox": 0.02, "loss_giou": 0.272, "loss_self_iou": 0.008, "cardinality_error": 8.135, "loss_ce_0": 0.302, "loss_counter_0": 0.125, "loss_bbox_0": 0.023, "loss_giou_0": 0.296, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.135, "loss_caption_0": 2.364, "loss_caption": 2.364, "total_loss": 13.057}, "3192": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.699, "loss_ce_0": 0.306, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.286, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.367, "loss_caption": 2.381, "total_loss": 13.038}, "3325": {"loss_ce": 0.3, "loss_counter": 0.123, "loss_bbox": 0.021, "loss_giou": 0.274, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.3, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.291, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.323, "loss_caption": 2.33, "total_loss": 12.887}, "3458": {"loss_ce": 0.31, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.277, "loss_self_iou": 0.01, "cardinality_error": 7.865, "loss_ce_0": 0.31, "loss_counter_0": 0.123, "loss_bbox_0": 0.023, "loss_giou_0": 0.295, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.865, "loss_caption_0": 2.351, "loss_caption": 2.341, "total_loss": 13.038}, "3591": {"loss_ce": 0.306, "loss_counter": 0.114, "loss_bbox": 0.022, "loss_giou": 0.263, "loss_self_iou": 0.009, "cardinality_error": 7.586, "loss_ce_0": 0.308, "loss_counter_0": 0.114, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.222, "loss_caption": 2.223, "total_loss": 12.425}, "3724": {"loss_ce": 0.305, "loss_counter": 0.123, "loss_bbox": 0.023, "loss_giou": 0.265, "loss_self_iou": 0.009, "cardinality_error": 7.624, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.624, "loss_caption_0": 2.38, "loss_caption": 2.368, "total_loss": 13.014}, "3857": {"loss_ce": 0.306, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.264, "loss_self_iou": 0.009, "cardinality_error": 7.489, "loss_ce_0": 0.312, "loss_counter_0": 0.114, "loss_bbox_0": 0.023, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.489, "loss_caption_0": 2.343, "loss_caption": 2.344, "total_loss": 12.897}, "3990": {"loss_ce": 0.299, "loss_counter": 0.134, "loss_bbox": 0.02, "loss_giou": 0.268, "loss_self_iou": 0.012, "cardinality_error": 8.301, "loss_ce_0": 0.299, "loss_counter_0": 0.131, "loss_bbox_0": 0.022, "loss_giou_0": 0.289, "loss_self_iou_0": 0.013, "cardinality_error_0": 8.301, "loss_caption_0": 2.327, "loss_caption": 2.346, "total_loss": 12.9}, "4123": {"loss_ce": 0.305, "loss_counter": 0.129, "loss_bbox": 0.021, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.925, "loss_ce_0": 0.307, "loss_counter_0": 0.126, "loss_bbox_0": 0.023, "loss_giou_0": 0.275, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 2.272, "loss_caption": 2.28, "total_loss": 12.579}, "4256": {"loss_ce": 0.308, "loss_counter": 0.121, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.632, "loss_ce_0": 0.31, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.276, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.632, "loss_caption_0": 2.247, "loss_caption": 2.252, "total_loss": 12.484}, "4389": {"loss_ce": 0.305, "loss_counter": 0.12, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.011, "cardinality_error": 7.526, "loss_ce_0": 0.309, "loss_counter_0": 0.119, "loss_bbox_0": 0.022, "loss_giou_0": 0.272, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.526, "loss_caption_0": 2.194, "loss_caption": 2.205, "total_loss": 12.273}, "4522": {"loss_ce": 0.305, "loss_counter": 0.115, "loss_bbox": 0.019, "loss_giou": 0.248, "loss_self_iou": 0.007, "cardinality_error": 7.519, "loss_ce_0": 0.303, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.335, "loss_caption": 2.326, "total_loss": 12.689}, "4655": {"loss_ce": 0.297, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.263, "loss_self_iou": 0.008, "cardinality_error": 7.97, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.022, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.97, "loss_caption_0": 2.254, "loss_caption": 2.267, "total_loss": 12.545}, "4788": {"loss_ce": 0.308, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.253, "loss_self_iou": 0.008, "cardinality_error": 7.481, "loss_ce_0": 0.308, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.481, "loss_caption_0": 2.208, "loss_caption": 2.195, "total_loss": 12.24}, "4921": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.262, "loss_self_iou": 0.01, "cardinality_error": 7.842, "loss_ce_0": 0.305, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.284, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.842, "loss_caption_0": 2.186, "loss_caption": 2.196, "total_loss": 12.289}, "5054": {"loss_ce": 0.303, "loss_counter": 0.121, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.009, "cardinality_error": 7.887, "loss_ce_0": 0.305, "loss_counter_0": 0.12, "loss_bbox_0": 0.023, "loss_giou_0": 0.271, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.887, "loss_caption_0": 2.242, "loss_caption": 2.239, "total_loss": 12.422}, "5187": {"loss_ce": 0.303, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.262, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.305, "loss_counter_0": 0.123, "loss_bbox_0": 0.022, "loss_giou_0": 0.277, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.25, "loss_caption": 2.246, "total_loss": 12.483}, "5320": {"loss_ce": 0.299, "loss_counter": 0.12, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.729, "loss_caption_0": 2.287, "loss_caption": 2.298, "total_loss": 12.64}, "5453": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.022, "loss_giou": 0.25, "loss_self_iou": 0.011, "cardinality_error": 7.519, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.023, "loss_giou_0": 0.269, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.519, "loss_caption_0": 2.175, "loss_caption": 2.176, "total_loss": 12.088}, "5586": {"loss_ce": 0.294, "loss_counter": 0.12, "loss_bbox": 0.018, "loss_giou": 0.252, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.292, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.274, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 2.16, "loss_caption": 2.132, "total_loss": 11.979}, "5719": {"loss_ce": 0.305, "loss_counter": 0.13, "loss_bbox": 0.02, "loss_giou": 0.255, "loss_self_iou": 0.008, "cardinality_error": 8.451, "loss_ce_0": 0.302, "loss_counter_0": 0.127, "loss_bbox_0": 0.021, "loss_giou_0": 0.273, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.451, "loss_caption_0": 2.166, "loss_caption": 2.164, "total_loss": 12.113}, "5852": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.246, "loss_self_iou": 0.007, "cardinality_error": 7.835, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.267, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.835, "loss_caption_0": 2.122, "loss_caption": 2.111, "total_loss": 11.841}, "5985": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.243, "loss_self_iou": 0.009, "cardinality_error": 7.474, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.263, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.474, "loss_caption_0": 2.149, "loss_caption": 2.14, "total_loss": 11.926}, "6118": {"loss_ce": 0.3, "loss_counter": 0.113, "loss_bbox": 0.018, "loss_giou": 0.241, "loss_self_iou": 0.008, "cardinality_error": 7.639, "loss_ce_0": 0.302, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.259, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.639, "loss_caption_0": 2.235, "loss_caption": 2.215, "total_loss": 12.218}, "6251": {"loss_ce": 0.301, "loss_counter": 0.125, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 7.857, "loss_ce_0": 0.301, "loss_counter_0": 0.125, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.857, "loss_caption_0": 2.235, "loss_caption": 2.226, "total_loss": 12.328}, "6384": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.02, "loss_giou": 0.246, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.301, "loss_counter_0": 0.124, "loss_bbox_0": 0.021, "loss_giou_0": 0.265, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 2.208, "loss_caption": 2.183, "total_loss": 12.157}, "6517": {"loss_ce": 0.297, "loss_counter": 0.12, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.872, "loss_ce_0": 0.295, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.271, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.872, "loss_caption_0": 2.135, "loss_caption": 2.155, "total_loss": 11.99}, "6650": {"loss_ce": 0.297, "loss_counter": 0.112, "loss_bbox": 0.021, "loss_giou": 0.244, "loss_self_iou": 0.008, "cardinality_error": 7.398, "loss_ce_0": 0.297, "loss_counter_0": 0.112, "loss_bbox_0": 0.023, "loss_giou_0": 0.26, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.398, "loss_caption_0": 2.205, "loss_caption": 2.202, "total_loss": 12.127}, "6783": {"loss_ce": 0.29, "loss_counter": 0.117, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.586, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.02, "loss_caption": 2.014, "total_loss": 11.332}, "6916": {"loss_ce": 0.301, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.249, "loss_self_iou": 0.008, "cardinality_error": 7.519, "loss_ce_0": 0.302, "loss_counter_0": 0.116, "loss_bbox_0": 0.023, "loss_giou_0": 0.264, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.118, "loss_caption": 2.101, "total_loss": 11.817}, "7049": {"loss_ce": 0.294, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.25, "loss_self_iou": 0.007, "cardinality_error": 7.699, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.02, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.105, "loss_caption": 2.111, "total_loss": 11.78}, "7182": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.242, "loss_self_iou": 0.008, "cardinality_error": 7.594, "loss_ce_0": 0.288, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.594, "loss_caption_0": 2.194, "loss_caption": 2.195, "total_loss": 12.045}, "7315": {"loss_ce": 0.29, "loss_counter": 0.123, "loss_bbox": 0.02, "loss_giou": 0.254, "loss_self_iou": 0.009, "cardinality_error": 8.301, "loss_ce_0": 0.291, "loss_counter_0": 0.123, "loss_bbox_0": 0.02, "loss_giou_0": 0.268, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.301, "loss_caption_0": 2.096, "loss_caption": 2.09, "total_loss": 11.741}, "7448": {"loss_ce": 0.296, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.234, "loss_self_iou": 0.006, "cardinality_error": 7.677, "loss_ce_0": 0.292, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.251, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.677, "loss_caption_0": 2.076, "loss_caption": 2.063, "total_loss": 11.513}, "7581": {"loss_ce": 0.298, "loss_counter": 0.116, "loss_bbox": 0.019, "loss_giou": 0.238, "loss_self_iou": 0.008, "cardinality_error": 7.534, "loss_ce_0": 0.295, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.534, "loss_caption_0": 2.114, "loss_caption": 2.112, "total_loss": 11.718}, "7714": {"loss_ce": 0.295, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.677, "loss_ce_0": 0.291, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.677, "loss_caption_0": 2.167, "loss_caption": 2.179, "total_loss": 11.932}, "7847": {"loss_ce": 0.293, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.252, "loss_self_iou": 0.009, "cardinality_error": 8.053, "loss_ce_0": 0.289, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.269, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.053, "loss_caption_0": 2.106, "loss_caption": 2.115, "total_loss": 11.804}, "7980": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.249, "loss_self_iou": 0.007, "cardinality_error": 7.902, "loss_ce_0": 0.295, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.902, "loss_caption_0": 2.151, "loss_caption": 2.153, "total_loss": 11.979}, "8113": {"loss_ce": 0.297, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.236, "loss_self_iou": 0.008, "cardinality_error": 7.617, "loss_ce_0": 0.295, "loss_counter_0": 0.112, "loss_bbox_0": 0.021, "loss_giou_0": 0.257, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.617, "loss_caption_0": 2.036, "loss_caption": 2.044, "total_loss": 11.427}, "8246": {"loss_ce": 0.286, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.237, "loss_self_iou": 0.006, "cardinality_error": 7.827, "loss_ce_0": 0.283, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 2.055, "loss_caption": 2.057, "total_loss": 11.458}, "8379": {"loss_ce": 0.29, "loss_counter": 0.118, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.286, "loss_counter_0": 0.117, "loss_bbox_0": 0.019, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 2.046, "loss_caption": 2.041, "total_loss": 11.331}, "8512": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.228, "loss_self_iou": 0.006, "cardinality_error": 7.654, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.245, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.654, "loss_caption_0": 1.991, "loss_caption": 1.997, "total_loss": 11.118}, "8645": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 8.068, "loss_ce_0": 0.287, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.068, "loss_caption_0": 2.094, "loss_caption": 2.097, "total_loss": 11.714}, "8778": {"loss_ce": 0.288, "loss_counter": 0.121, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.008, "cardinality_error": 8.008, "loss_ce_0": 0.286, "loss_counter_0": 0.121, "loss_bbox_0": 0.02, "loss_giou_0": 0.258, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.008, "loss_caption_0": 2.092, "loss_caption": 2.092, "total_loss": 11.63}, "8911": {"loss_ce": 0.298, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.338, "loss_ce_0": 0.297, "loss_counter_0": 0.114, "loss_bbox_0": 0.02, "loss_giou_0": 0.248, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.338, "loss_caption_0": 2.051, "loss_caption": 2.054, "total_loss": 11.446}, "9044": {"loss_ce": 0.292, "loss_counter": 0.105, "loss_bbox": 0.02, "loss_giou": 0.227, "loss_self_iou": 0.008, "cardinality_error": 7.226, "loss_ce_0": 0.292, "loss_counter_0": 0.105, "loss_bbox_0": 0.021, "loss_giou_0": 0.243, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.226, "loss_caption_0": 2.08, "loss_caption": 2.084, "total_loss": 11.478}, "9177": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.254, "loss_self_iou": 0.007, "cardinality_error": 7.977, "loss_ce_0": 0.288, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.275, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.977, "loss_caption_0": 2.046, "loss_caption": 2.031, "total_loss": 11.546}, "9310": {"loss_ce": 0.28, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.236, "loss_self_iou": 0.006, "cardinality_error": 7.97, "loss_ce_0": 0.281, "loss_counter_0": 0.118, "loss_bbox_0": 0.019, "loss_giou_0": 0.252, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.97, "loss_caption_0": 1.986, "loss_caption": 1.995, "total_loss": 11.157}, "9443": {"loss_ce": 0.294, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.226, "loss_self_iou": 0.006, "cardinality_error": 7.617, "loss_ce_0": 0.292, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.239, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.617, "loss_caption_0": 2.065, "loss_caption": 2.061, "total_loss": 11.394}, "9576": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.02, "loss_giou": 0.231, "loss_self_iou": 0.006, "cardinality_error": 7.917, "loss_ce_0": 0.284, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.252, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.917, "loss_caption_0": 1.977, "loss_caption": 1.974, "total_loss": 11.093}, "9709": {"loss_ce": 0.291, "loss_counter": 0.117, "loss_bbox": 0.016, "loss_giou": 0.224, "loss_self_iou": 0.006, "cardinality_error": 8.098, "loss_ce_0": 0.29, "loss_counter_0": 0.117, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.098, "loss_caption_0": 2.051, "loss_caption": 2.063, "total_loss": 11.373}, "9842": {"loss_ce": 0.288, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.242, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.286, "loss_counter_0": 0.11, "loss_bbox_0": 0.02, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 1.939, "loss_caption": 1.953, "total_loss": 11.058}, "9975": {"loss_ce": 0.28, "loss_counter": 0.116, "loss_bbox": 0.017, "loss_giou": 0.238, "loss_self_iou": 0.006, "cardinality_error": 8.233, "loss_ce_0": 0.281, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.255, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.233, "loss_caption_0": 2.024, "loss_caption": 2.026, "total_loss": 11.31}, "10108": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.018, "loss_giou": 0.232, "loss_self_iou": 0.006, "cardinality_error": 7.466, "loss_ce_0": 0.279, "loss_counter_0": 0.112, "loss_bbox_0": 0.02, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.466, "loss_caption_0": 1.878, "loss_caption": 1.882, "total_loss": 10.667}, "10241": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.018, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.722, "loss_ce_0": 0.282, "loss_counter_0": 0.119, "loss_bbox_0": 0.019, "loss_giou_0": 0.253, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.722, "loss_caption_0": 1.984, "loss_caption": 1.988, "total_loss": 11.165}, "10374": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.225, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.285, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 2.089, "loss_caption": 2.094, "total_loss": 11.498}, "10507": {"loss_ce": 0.287, "loss_counter": 0.113, "loss_bbox": 0.019, "loss_giou": 0.22, "loss_self_iou": 0.007, "cardinality_error": 7.564, "loss_ce_0": 0.283, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.564, "loss_caption_0": 1.936, "loss_caption": 1.935, "total_loss": 10.84}, "10640": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.232, "loss_self_iou": 0.008, "cardinality_error": 7.549, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.022, "loss_giou_0": 0.249, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.549, "loss_caption_0": 2.041, "loss_caption": 2.042, "total_loss": 11.323}, "10773": {"loss_ce": 0.279, "loss_counter": 0.114, "loss_bbox": 0.017, "loss_giou": 0.235, "loss_self_iou": 0.006, "cardinality_error": 7.94, "loss_ce_0": 0.278, "loss_counter_0": 0.115, "loss_bbox_0": 0.018, "loss_giou_0": 0.253, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.94, "loss_caption_0": 1.851, "loss_caption": 1.84, "total_loss": 10.561}, "10906": {"loss_ce": 0.279, "loss_counter": 0.109, "loss_bbox": 0.017, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.218, "loss_ce_0": 0.278, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.218, "loss_caption_0": 1.945, "loss_caption": 1.948, "total_loss": 10.791}, "11039": {"loss_ce": 0.288, "loss_counter": 0.108, "loss_bbox": 0.017, "loss_giou": 0.207, "loss_self_iou": 0.006, "cardinality_error": 7.579, "loss_ce_0": 0.283, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.579, "loss_caption_0": 1.92, "loss_caption": 1.927, "total_loss": 10.664}, "11172": {"loss_ce": 0.28, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.451, "loss_ce_0": 0.279, "loss_counter_0": 0.11, "loss_bbox_0": 0.019, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.451, "loss_caption_0": 1.91, "loss_caption": 1.9, "total_loss": 10.635}, "11305": {"loss_ce": 0.278, "loss_counter": 0.125, "loss_bbox": 0.017, "loss_giou": 0.233, "loss_self_iou": 0.006, "cardinality_error": 8.09, "loss_ce_0": 0.276, "loss_counter_0": 0.126, "loss_bbox_0": 0.018, "loss_giou_0": 0.244, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.09, "loss_caption_0": 1.876, "loss_caption": 1.877, "total_loss": 10.648}, "11438": {"loss_ce": 0.273, "loss_counter": 0.113, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.269, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.981, "loss_caption": 1.968, "total_loss": 10.865}, "11571": {"loss_ce": 0.281, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.006, "cardinality_error": 7.699, "loss_ce_0": 0.277, "loss_counter_0": 0.115, "loss_bbox_0": 0.02, "loss_giou_0": 0.243, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.699, "loss_caption_0": 1.833, "loss_caption": 1.846, "total_loss": 10.461}, "11704": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.21, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.91, "loss_caption": 1.915, "total_loss": 10.628}, "11837": {"loss_ce": 0.271, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.215, "loss_self_iou": 0.007, "cardinality_error": 8.0, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.0, "loss_caption_0": 1.936, "loss_caption": 1.939, "total_loss": 10.726}, "11970": {"loss_ce": 0.272, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.22, "loss_self_iou": 0.006, "cardinality_error": 8.158, "loss_ce_0": 0.27, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.158, "loss_caption_0": 1.953, "loss_caption": 1.962, "total_loss": 10.881}, "12103": {"loss_ce": 0.275, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.216, "loss_self_iou": 0.006, "cardinality_error": 8.038, "loss_ce_0": 0.274, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.038, "loss_caption_0": 1.832, "loss_caption": 1.845, "total_loss": 10.35}, "12236": {"loss_ce": 0.272, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.206, "loss_self_iou": 0.005, "cardinality_error": 7.812, "loss_ce_0": 0.266, "loss_counter_0": 0.111, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.968, "loss_caption": 1.959, "total_loss": 10.757}, "12369": {"loss_ce": 0.273, "loss_counter": 0.118, "loss_bbox": 0.016, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.27, "loss_counter_0": 0.118, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 1.89, "loss_caption": 1.903, "total_loss": 10.534}, "12502": {"loss_ce": 0.27, "loss_counter": 0.108, "loss_bbox": 0.016, "loss_giou": 0.205, "loss_self_iou": 0.006, "cardinality_error": 7.684, "loss_ce_0": 0.268, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.684, "loss_caption_0": 1.903, "loss_caption": 1.905, "total_loss": 10.519}, "12635": {"loss_ce": 0.27, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.218, "loss_self_iou": 0.005, "cardinality_error": 7.947, "loss_ce_0": 0.269, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.232, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.947, "loss_caption_0": 1.822, "loss_caption": 1.826, "total_loss": 10.284}, "12768": {"loss_ce": 0.277, "loss_counter": 0.111, "loss_bbox": 0.017, "loss_giou": 0.219, "loss_self_iou": 0.008, "cardinality_error": 7.669, "loss_ce_0": 0.276, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.669, "loss_caption_0": 1.905, "loss_caption": 1.909, "total_loss": 10.662}, "12901": {"loss_ce": 0.269, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.639, "loss_ce_0": 0.267, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.639, "loss_caption_0": 1.856, "loss_caption": 1.863, "total_loss": 10.344}, "13034": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.274, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.841, "loss_caption": 1.841, "total_loss": 10.356}, "13167": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.018, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.406, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.019, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.406, "loss_caption_0": 1.931, "loss_caption": 1.927, "total_loss": 10.663}, "13300": {"loss_ce": 0.274, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.005, "cardinality_error": 7.737, "loss_ce_0": 0.272, "loss_counter_0": 0.113, "loss_bbox_0": 0.019, "loss_giou_0": 0.23, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.737, "loss_caption_0": 1.853, "loss_caption": 1.849, "total_loss": 10.379}, "13433": {"loss_ce": 0.271, "loss_counter": 0.112, "loss_bbox": 0.017, "loss_giou": 0.217, "loss_self_iou": 0.006, "cardinality_error": 7.835, "loss_ce_0": 0.267, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.835, "loss_caption_0": 1.804, "loss_caption": 1.811, "total_loss": 10.223}, "13566": {"loss_ce": 0.266, "loss_counter": 0.116, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.005, "cardinality_error": 7.774, "loss_ce_0": 0.266, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.221, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.774, "loss_caption_0": 1.884, "loss_caption": 1.887, "total_loss": 10.42}, "13699": {"loss_ce": 0.261, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.201, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.017, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.729, "loss_caption_0": 1.823, "loss_caption": 1.806, "total_loss": 10.083}, "13832": {"loss_ce": 0.269, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.699, "loss_ce_0": 0.271, "loss_counter_0": 0.112, "loss_bbox_0": 0.017, "loss_giou_0": 0.228, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.855, "loss_caption": 1.857, "total_loss": 10.374}, "13965": {"loss_ce": 0.275, "loss_counter": 0.105, "loss_bbox": 0.016, "loss_giou": 0.196, "loss_self_iou": 0.006, "cardinality_error": 7.128, "loss_ce_0": 0.271, "loss_counter_0": 0.106, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.128, "loss_caption_0": 1.809, "loss_caption": 1.8, "total_loss": 10.055}, "14098": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.016, "loss_giou": 0.213, "loss_self_iou": 0.007, "cardinality_error": 7.925, "loss_ce_0": 0.273, "loss_counter_0": 0.113, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 1.863, "loss_caption": 1.863, "total_loss": 10.433}, "14231": {"loss_ce": 0.261, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.007, "cardinality_error": 7.82, "loss_ce_0": 0.262, "loss_counter_0": 0.114, "loss_bbox_0": 0.018, "loss_giou_0": 0.222, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 1.936, "loss_caption": 1.929, "total_loss": 10.624}, "14364": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.263, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.227, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.757, "loss_caption": 1.754, "total_loss": 9.948}, "14497": {"loss_ce": 0.266, "loss_counter": 0.11, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.265, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.827, "loss_caption_0": 1.896, "loss_caption": 1.894, "total_loss": 10.407}, "14630": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.925, "loss_ce_0": 0.261, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.224, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.925, "loss_caption_0": 1.84, "loss_caption": 1.842, "total_loss": 10.253}, "14763": {"loss_ce": 0.266, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.006, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.225, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.87, "loss_caption": 1.877, "total_loss": 10.398}, "14896": {"loss_ce": 0.26, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.217, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.692, "loss_caption_0": 1.796, "loss_caption": 1.784, "total_loss": 9.979}, "15029": {"loss_ce": 0.264, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.006, "cardinality_error": 7.414, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.414, "loss_caption_0": 1.763, "loss_caption": 1.767, "total_loss": 9.842}, "15162": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.262, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.767, "loss_caption_0": 1.781, "loss_caption": 1.781, "total_loss": 9.916}, "15295": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.662, "loss_ce_0": 0.255, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.662, "loss_caption_0": 1.735, "loss_caption": 1.75, "total_loss": 9.755}, "15428": {"loss_ce": 0.258, "loss_counter": 0.114, "loss_bbox": 0.015, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.992, "loss_ce_0": 0.261, "loss_counter_0": 0.114, "loss_bbox_0": 0.016, "loss_giou_0": 0.22, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.852, "loss_caption": 1.86, "total_loss": 10.298}, "15561": {"loss_ce": 0.256, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.006, "cardinality_error": 8.068, "loss_ce_0": 0.257, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.068, "loss_caption_0": 1.878, "loss_caption": 1.866, "total_loss": 10.314}, "15694": {"loss_ce": 0.256, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.202, "loss_self_iou": 0.004, "cardinality_error": 7.647, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.215, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.647, "loss_caption_0": 1.7, "loss_caption": 1.684, "total_loss": 9.569}, "15827": {"loss_ce": 0.259, "loss_counter": 0.104, "loss_bbox": 0.016, "loss_giou": 0.194, "loss_self_iou": 0.005, "cardinality_error": 7.722, "loss_ce_0": 0.257, "loss_counter_0": 0.105, "loss_bbox_0": 0.017, "loss_giou_0": 0.209, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.722, "loss_caption_0": 1.848, "loss_caption": 1.839, "total_loss": 10.119}, "15960": {"loss_ce": 0.26, "loss_counter": 0.107, "loss_bbox": 0.015, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 7.609, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.609, "loss_caption_0": 1.847, "loss_caption": 1.858, "total_loss": 10.198}, "16093": {"loss_ce": 0.257, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.258, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.773, "loss_caption": 1.769, "total_loss": 9.789}, "16226": {"loss_ce": 0.26, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.004, "cardinality_error": 7.805, "loss_ce_0": 0.259, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.215, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.805, "loss_caption_0": 1.743, "loss_caption": 1.749, "total_loss": 9.786}, "16359": {"loss_ce": 0.265, "loss_counter": 0.116, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.115, "loss_bbox_0": 0.015, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.797, "loss_caption": 1.778, "total_loss": 9.972}, "16492": {"loss_ce": 0.254, "loss_counter": 0.105, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.383, "loss_ce_0": 0.257, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.383, "loss_caption_0": 1.796, "loss_caption": 1.808, "total_loss": 9.899}, "16625": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.005, "cardinality_error": 7.782, "loss_ce_0": 0.256, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.203, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.782, "loss_caption_0": 1.78, "loss_caption": 1.779, "total_loss": 9.812}, "16758": {"loss_ce": 0.252, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.962, "loss_ce_0": 0.252, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.962, "loss_caption_0": 1.795, "loss_caption": 1.806, "total_loss": 9.948}, "16891": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.016, "loss_giou": 0.199, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.255, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.788, "loss_caption": 1.782, "total_loss": 9.914}, "17024": {"loss_ce": 0.262, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.511, "loss_ce_0": 0.26, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.511, "loss_caption_0": 1.717, "loss_caption": 1.72, "total_loss": 9.666}, "17157": {"loss_ce": 0.25, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.252, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.738, "loss_caption": 1.749, "total_loss": 9.638}, "17290": {"loss_ce": 0.254, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.932, "loss_ce_0": 0.254, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.932, "loss_caption_0": 1.815, "loss_caption": 1.83, "total_loss": 10.067}, "17423": {"loss_ce": 0.262, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.208, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 1.865, "loss_caption": 1.881, "total_loss": 10.261}, "17556": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.193, "loss_self_iou": 0.004, "cardinality_error": 7.737, "loss_ce_0": 0.253, "loss_counter_0": 0.112, "loss_bbox_0": 0.015, "loss_giou_0": 0.209, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.744, "loss_caption": 1.743, "total_loss": 9.707}, "17689": {"loss_ce": 0.259, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.199, "loss_self_iou": 0.006, "cardinality_error": 7.602, "loss_ce_0": 0.262, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.602, "loss_caption_0": 1.835, "loss_caption": 1.819, "total_loss": 10.1}, "17822": {"loss_ce": 0.25, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.191, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.249, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.681, "loss_caption": 1.67, "total_loss": 9.397}, "17955": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.184, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.252, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.757, "loss_caption": 1.745, "total_loss": 9.658}, "18088": {"loss_ce": 0.251, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.534, "loss_ce_0": 0.251, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.191, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.534, "loss_caption_0": 1.703, "loss_caption": 1.701, "total_loss": 9.39}, "18221": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.201, "loss_self_iou": 0.005, "cardinality_error": 8.211, "loss_ce_0": 0.252, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.213, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.211, "loss_caption_0": 1.824, "loss_caption": 1.816, "total_loss": 10.053}, "18354": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.249, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.21, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.792, "loss_caption": 1.779, "total_loss": 9.874}, "18487": {"loss_ce": 0.255, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.251, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.205, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.826, "loss_caption": 1.81, "total_loss": 9.979}, "18620": {"loss_ce": 0.251, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.003, "cardinality_error": 7.737, "loss_ce_0": 0.251, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.767, "loss_caption": 1.771, "total_loss": 9.784}, "18753": {"loss_ce": 0.247, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 8.241, "loss_ce_0": 0.251, "loss_counter_0": 0.115, "loss_bbox_0": 0.014, "loss_giou_0": 0.207, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.241, "loss_caption_0": 1.758, "loss_caption": 1.759, "total_loss": 9.756}, "18886": {"loss_ce": 0.247, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.182, "loss_self_iou": 0.004, "cardinality_error": 7.436, "loss_ce_0": 0.245, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.194, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.436, "loss_caption_0": 1.696, "loss_caption": 1.692, "total_loss": 9.366}, "19019": {"loss_ce": 0.243, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 7.692, "loss_ce_0": 0.242, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.73, "loss_caption": 1.729, "total_loss": 9.496}, "19152": {"loss_ce": 0.251, "loss_counter": 0.112, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.251, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.779, "loss_caption": 1.771, "total_loss": 9.714}, "19285": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.014, "loss_giou": 0.194, "loss_self_iou": 0.004, "cardinality_error": 7.669, "loss_ce_0": 0.25, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.204, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.669, "loss_caption_0": 1.76, "loss_caption": 1.772, "total_loss": 9.759}, "19418": {"loss_ce": 0.244, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 8.256, "loss_ce_0": 0.245, "loss_counter_0": 0.117, "loss_bbox_0": 0.014, "loss_giou_0": 0.211, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.256, "loss_caption_0": 1.754, "loss_caption": 1.758, "total_loss": 9.747}, "19551": {"loss_ce": 0.249, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.253, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.68, "loss_caption": 1.689, "total_loss": 9.3}, "19684": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.005, "cardinality_error": 7.474, "loss_ce_0": 0.262, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.199, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.474, "loss_caption_0": 1.81, "loss_caption": 1.803, "total_loss": 9.923}, "19817": {"loss_ce": 0.246, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.247, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.769, "loss_caption": 1.765, "total_loss": 9.677}, "19950": {"loss_ce": 0.254, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.253, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.736, "loss_caption": 1.748, "total_loss": 9.654}, "20083": {"loss_ce": 0.254, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.519, "loss_ce_0": 0.257, "loss_counter_0": 0.103, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.519, "loss_caption_0": 1.743, "loss_caption": 1.756, "total_loss": 9.655}, "20216": {"loss_ce": 0.244, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.759, "loss_ce_0": 0.244, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.759, "loss_caption_0": 1.79, "loss_caption": 1.781, "total_loss": 9.713}, "20349": {"loss_ce": 0.246, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.245, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.749, "loss_caption": 1.759, "total_loss": 9.675}, "20482": {"loss_ce": 0.244, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.005, "cardinality_error": 7.94, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.207, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.94, "loss_caption_0": 1.694, "loss_caption": 1.715, "total_loss": 9.502}, "20615": {"loss_ce": 0.257, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.005, "cardinality_error": 7.368, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.368, "loss_caption_0": 1.77, "loss_caption": 1.771, "total_loss": 9.775}, "20748": {"loss_ce": 0.247, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.857, "loss_ce_0": 0.247, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.857, "loss_caption_0": 1.786, "loss_caption": 1.773, "total_loss": 9.695}, "20881": {"loss_ce": 0.243, "loss_counter": 0.103, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.594, "loss_ce_0": 0.242, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.594, "loss_caption_0": 1.746, "loss_caption": 1.748, "total_loss": 9.541}, "21014": {"loss_ce": 0.249, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 8.09, "loss_ce_0": 0.249, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.09, "loss_caption_0": 1.709, "loss_caption": 1.698, "total_loss": 9.49}, "21147": {"loss_ce": 0.246, "loss_counter": 0.115, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.248, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.198, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.733, "loss_caption": 1.732, "total_loss": 9.57}, "21280": {"loss_ce": 0.246, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.632, "loss_ce_0": 0.245, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.632, "loss_caption_0": 1.646, "loss_caption": 1.658, "total_loss": 9.233}, "21413": {"loss_ce": 0.24, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.541, "loss_ce_0": 0.239, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.541, "loss_caption_0": 1.637, "loss_caption": 1.633, "total_loss": 9.069}, "21546": {"loss_ce": 0.245, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.172, "loss_self_iou": 0.004, "cardinality_error": 7.624, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.185, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.624, "loss_caption_0": 1.773, "loss_caption": 1.784, "total_loss": 9.621}, "21679": {"loss_ce": 0.239, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.238, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.809, "loss_caption": 1.805, "total_loss": 9.791}, "21812": {"loss_ce": 0.246, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.677, "loss_ce_0": 0.25, "loss_counter_0": 0.108, "loss_bbox_0": 0.013, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.677, "loss_caption_0": 1.674, "loss_caption": 1.676, "total_loss": 9.277}, "21945": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.192, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.244, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.713, "loss_caption": 1.714, "total_loss": 9.531}, "22078": {"loss_ce": 0.251, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.707, "loss_ce_0": 0.247, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.707, "loss_caption_0": 1.772, "loss_caption": 1.758, "total_loss": 9.738}, "22211": {"loss_ce": 0.249, "loss_counter": 0.101, "loss_bbox": 0.013, "loss_giou": 0.18, "loss_self_iou": 0.005, "cardinality_error": 7.541, "loss_ce_0": 0.249, "loss_counter_0": 0.101, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.541, "loss_caption_0": 1.665, "loss_caption": 1.66, "total_loss": 9.243}, "22344": {"loss_ce": 0.246, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 8.008, "loss_ce_0": 0.248, "loss_counter_0": 0.115, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.008, "loss_caption_0": 1.799, "loss_caption": 1.784, "total_loss": 9.823}, "22477": {"loss_ce": 0.246, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.184, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.247, "loss_counter_0": 0.104, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.722, "loss_caption": 1.733, "total_loss": 9.525}, "22610": {"loss_ce": 0.243, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 7.729, "loss_ce_0": 0.245, "loss_counter_0": 0.106, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.729, "loss_caption_0": 1.664, "loss_caption": 1.667, "total_loss": 9.297}, "22743": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.714, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.017, "loss_giou_0": 0.21, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.714, "loss_caption_0": 1.773, "loss_caption": 1.775, "total_loss": 9.803}, "22876": {"loss_ce": 0.245, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.774, "loss_ce_0": 0.249, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.774, "loss_caption_0": 1.76, "loss_caption": 1.759, "total_loss": 9.631}, "23009": {"loss_ce": 0.237, "loss_counter": 0.105, "loss_bbox": 0.012, "loss_giou": 0.171, "loss_self_iou": 0.003, "cardinality_error": 7.872, "loss_ce_0": 0.237, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.183, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.872, "loss_caption_0": 1.69, "loss_caption": 1.688, "total_loss": 9.229}, "23142": {"loss_ce": 0.242, "loss_counter": 0.098, "loss_bbox": 0.013, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.744, "loss_ce_0": 0.239, "loss_counter_0": 0.1, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.744, "loss_caption_0": 1.66, "loss_caption": 1.663, "total_loss": 9.173}, "23275": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 7.82, "loss_ce_0": 0.242, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.727, "loss_caption": 1.741, "total_loss": 9.535}, "23408": {"loss_ce": 0.235, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.173, "loss_self_iou": 0.004, "cardinality_error": 7.083, "loss_ce_0": 0.235, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.182, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.083, "loss_caption_0": 1.678, "loss_caption": 1.68, "total_loss": 9.181}, "23541": {"loss_ce": 0.25, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.185, "loss_self_iou": 0.003, "cardinality_error": 7.782, "loss_ce_0": 0.253, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.782, "loss_caption_0": 1.686, "loss_caption": 1.674, "total_loss": 9.361}, "23674": {"loss_ce": 0.242, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.242, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.699, "loss_caption_0": 1.734, "loss_caption": 1.755, "total_loss": 9.502}, "23807": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 8.023, "loss_ce_0": 0.248, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.199, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.838, "loss_caption": 1.842, "total_loss": 10.01}, "23940": {"loss_ce": 0.242, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.246, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.661, "loss_caption": 1.655, "total_loss": 9.188}, "24073": {"loss_ce": 0.244, "loss_counter": 0.11, "loss_bbox": 0.012, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.97, "loss_ce_0": 0.246, "loss_counter_0": 0.112, "loss_bbox_0": 0.013, "loss_giou_0": 0.191, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.97, "loss_caption_0": 1.689, "loss_caption": 1.683, "total_loss": 9.309}, "24206": {"loss_ce": 0.237, "loss_counter": 0.118, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 8.286, "loss_ce_0": 0.236, "loss_counter_0": 0.118, "loss_bbox_0": 0.013, "loss_giou_0": 0.195, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.286, "loss_caption_0": 1.712, "loss_caption": 1.715, "total_loss": 9.432}, "24339": {"loss_ce": 0.245, "loss_counter": 0.098, "loss_bbox": 0.012, "loss_giou": 0.167, "loss_self_iou": 0.003, "cardinality_error": 7.316, "loss_ce_0": 0.247, "loss_counter_0": 0.099, "loss_bbox_0": 0.013, "loss_giou_0": 0.179, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.316, "loss_caption_0": 1.695, "loss_caption": 1.701, "total_loss": 9.257}, "24472": {"loss_ce": 0.243, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.003, "cardinality_error": 7.459, "loss_ce_0": 0.248, "loss_counter_0": 0.109, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.459, "loss_caption_0": 1.699, "loss_caption": 1.699, "total_loss": 9.337}, "24605": {"loss_ce": 0.242, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.18, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.243, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.812, "loss_caption_0": 1.775, "loss_caption": 1.773, "total_loss": 9.644}, "24738": {"loss_ce": 0.243, "loss_counter": 0.101, "loss_bbox": 0.016, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.556, "loss_ce_0": 0.246, "loss_counter_0": 0.103, "loss_bbox_0": 0.016, "loss_giou_0": 0.196, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.556, "loss_caption_0": 1.727, "loss_caption": 1.73, "total_loss": 9.525}, "24871": {"loss_ce": 0.239, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.241, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.77, "loss_caption": 1.773, "total_loss": 9.641}, "25004": {"loss_ce": 0.246, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 8.143, "loss_ce_0": 0.247, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.143, "loss_caption_0": 1.692, "loss_caption": 1.684, "total_loss": 9.379}, "25137": {"loss_ce": 0.245, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.179, "loss_self_iou": 0.004, "cardinality_error": 7.88, "loss_ce_0": 0.245, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.88, "loss_caption_0": 1.691, "loss_caption": 1.696, "total_loss": 9.347}, "25270": {"loss_ce": 0.237, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.185, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.238, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.767, "loss_caption_0": 1.687, "loss_caption": 1.694, "total_loss": 9.34}, "25403": {"loss_ce": 0.247, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.005, "cardinality_error": 7.429, "loss_ce_0": 0.248, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.429, "loss_caption_0": 1.705, "loss_caption": 1.695, "total_loss": 9.343}, "25536": {"loss_ce": 0.241, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.189, "loss_self_iou": 0.003, "cardinality_error": 7.887, "loss_ce_0": 0.246, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.887, "loss_caption_0": 1.717, "loss_caption": 1.729, "total_loss": 9.517}, "25669": {"loss_ce": 0.239, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.707, "loss_ce_0": 0.243, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.707, "loss_caption_0": 1.718, "loss_caption": 1.711, "total_loss": 9.385}, "25802": {"loss_ce": 0.24, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 8.173, "loss_ce_0": 0.242, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.173, "loss_caption_0": 1.732, "loss_caption": 1.735, "total_loss": 9.515}, "25935": {"loss_ce": 0.241, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.241, "loss_counter_0": 0.107, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.626, "loss_caption": 1.628, "total_loss": 9.063}, "26068": {"loss_ce": 0.24, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.182, "loss_self_iou": 0.005, "cardinality_error": 7.444, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.444, "loss_caption_0": 1.697, "loss_caption": 1.701, "total_loss": 9.35}, "26201": {"loss_ce": 0.239, "loss_counter": 0.097, "loss_bbox": 0.014, "loss_giou": 0.168, "loss_self_iou": 0.005, "cardinality_error": 7.301, "loss_ce_0": 0.237, "loss_counter_0": 0.099, "loss_bbox_0": 0.015, "loss_giou_0": 0.181, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.301, "loss_caption_0": 1.702, "loss_caption": 1.703, "total_loss": 9.254}, "26334": {"loss_ce": 0.238, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.174, "loss_self_iou": 0.003, "cardinality_error": 7.827, "loss_ce_0": 0.242, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.827, "loss_caption_0": 1.729, "loss_caption": 1.725, "total_loss": 9.424}, "26467": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 8.023, "loss_ce_0": 0.245, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.195, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.751, "loss_caption": 1.746, "total_loss": 9.586}, "26600": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.902, "loss_ce_0": 0.242, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.902, "loss_caption_0": 1.727, "loss_caption": 1.737, "total_loss": 9.533}}, "lr_history": {"133": 5e-05, "266": 5e-05, "399": 5e-05, "532": 5e-05, "665": 5e-05, "798": 5e-05, "931": 5e-05, "1064": 5e-05, "1197": 5e-05, "1330": 5e-05, "1463": 5e-05, "1596": 5e-05, "1729": 5e-05, "1862": 5e-05, "1995": 5e-05, "2128": 5e-05, "2261": 5e-05, "2394": 5e-05, "2527": 5e-05, "2660": 5e-05, "2793": 5e-05, "2926": 5e-05, "3059": 5e-05, "3192": 5e-05, "3325": 5e-05, "3458": 5e-05, "3591": 5e-05, "3724": 5e-05, "3857": 5e-05, "3990": 5e-05, "4123": 5e-05, "4256": 5e-05, "4389": 5e-05, "4522": 5e-05, "4655": 5e-05, "4788": 5e-05, "4921": 5e-05, "5054": 5e-05, "5187": 5e-05, "5320": 5e-05, "5453": 5e-05, "5586": 5e-05, "5719": 5e-05, "5852": 5e-05, "5985": 5e-05, "6118": 5e-05, "6251": 5e-05, "6384": 5e-05, "6517": 5e-05, "6650": 5e-05, "6783": 5e-05, "6916": 5e-05, "7049": 5e-05, "7182": 5e-05, "7315": 5e-05, "7448": 5e-05, "7581": 5e-05, "7714": 5e-05, "7847": 5e-05, "7980": 5e-05, "8113": 5e-05, "8246": 5e-05, "8379": 5e-05, "8512": 5e-05, "8645": 5e-05, "8778": 5e-05, "8911": 5e-05, "9044": 5e-05, "9177": 5e-05, "9310": 5e-05, "9443": 5e-05, "9576": 5e-05, "9709": 5e-05, "9842": 5e-05, "9975": 5e-05, "10108": 5e-05, "10241": 5e-05, "10374": 5e-05, "10507": 5e-05, "10640": 5e-05, "10773": 2.5e-05, "10906": 2.5e-05, "11039": 2.5e-05, "11172": 2.5e-05, "11305": 2.5e-05, "11438": 2.5e-05, "11571": 2.5e-05, "11704": 2.5e-05, "11837": 2.5e-05, "11970": 2.5e-05, "12103": 2.5e-05, "12236": 2.5e-05, "12369": 2.5e-05, "12502": 2.5e-05, "12635": 2.5e-05, "12768": 2.5e-05, "12901": 2.5e-05, "13034": 2.5e-05, "13167": 2.5e-05, "13300": 2.5e-05, "13433": 2.5e-05, "13566": 2.5e-05, "13699": 2.5e-05, "13832": 2.5e-05, "13965": 2.5e-05, "14098": 2.5e-05, "14231": 2.5e-05, "14364": 2.5e-05, "14497": 2.5e-05, "14630": 2.5e-05, "14763": 1.25e-05, "14896": 1.25e-05, "15029": 1.25e-05, "15162": 1.25e-05, "15295": 1.25e-05, "15428": 1.25e-05, "15561": 1.25e-05, "15694": 1.25e-05, "15827": 1.25e-05, "15960": 1.25e-05, "16093": 1.25e-05, "16226": 1.25e-05, "16359": 1.25e-05, "16492": 1.25e-05, "16625": 1.25e-05, "16758": 1.25e-05, "16891": 1.25e-05, "17024": 1.25e-05, "17157": 1.25e-05, "17290": 1.25e-05, "17423": 1.25e-05, "17556": 1.25e-05, "17689": 1.25e-05, "17822": 1.25e-05, "17955": 1.25e-05, "18088": 1.25e-05, "18221": 1.25e-05, "18354": 1.25e-05, "18487": 1.25e-05, "18620": 1.25e-05, "18753": 6.25e-06, "18886": 6.25e-06, "19019": 6.25e-06, "19152": 6.25e-06, "19285": 6.25e-06, "19418": 6.25e-06, "19551": 6.25e-06, "19684": 6.25e-06, "19817": 6.25e-06, "19950": 6.25e-06, "20083": 6.25e-06, "20216": 6.25e-06, "20349": 6.25e-06, "20482": 6.25e-06, "20615": 6.25e-06, "20748": 6.25e-06, "20881": 6.25e-06, "21014": 6.25e-06, "21147": 6.25e-06, "21280": 6.25e-06, "21413": 6.25e-06, "21546": 6.25e-06, "21679": 6.25e-06, "21812": 6.25e-06, "21945": 6.25e-06, "22078": 6.25e-06, "22211": 6.25e-06, "22344": 6.25e-06, "22477": 6.25e-06, "22610": 6.25e-06, "22743": 3.125e-06, "22876": 3.125e-06, "23009": 3.125e-06, "23142": 3.125e-06, "23275": 3.125e-06, "23408": 3.125e-06, "23541": 3.125e-06, "23674": 3.125e-06, "23807": 3.125e-06, "23940": 3.125e-06, "24073": 3.125e-06, "24206": 3.125e-06, "24339": 3.125e-06, "24472": 3.125e-06, "24605": 3.125e-06, "24738": 3.125e-06, "24871": 3.125e-06, "25004": 3.125e-06, "25137": 3.125e-06, "25270": 3.125e-06, "25403": 3.125e-06, "25536": 3.125e-06, "25669": 3.125e-06, "25802": 3.125e-06, "25935": 3.125e-06, "26068": 3.125e-06, "26201": 3.125e-06, "26334": 3.125e-06, "26467": 3.125e-06, "26600": 3.125e-06}}, "eval_history": {}}
\ No newline at end of file
diff --git a/yc2_univl/model-best.pth b/yc2_univl/model-best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..42c65282e76faa81b01540eeb0c653178e2e7f49
--- /dev/null
+++ b/yc2_univl/model-best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c5649639a5e2d91c0e6430f7ef3a969419b6a920dabf8cac4617479f64d1d76
+size 377084545
diff --git a/yc2_univl/model-last.pth b/yc2_univl/model-last.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd48389557fb5978c075e01fe7bcfa5562d45dd2
--- /dev/null
+++ b/yc2_univl/model-last.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:417eb7f05c35061d38ba56c6fab6b264b7e8c9c6d02cbf125c5586c1087696be
+size 377084545
diff --git a/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 b/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0
new file mode 100644
index 0000000000000000000000000000000000000000..d19b25d3fe05018fbced327351295a36da6e2a4d
--- /dev/null
+++ b/yc2_univl/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cdd59bc89bebf7407e106fa3a4f04453f379c084fc388a64117a235f79c9746
+size 179068
diff --git a/yc2_univl/train.log b/yc2_univl/train.log
new file mode 100644
index 0000000000000000000000000000000000000000..bb2d6d2ae6890d5bf94f03e99b8267cbac0b8177
--- /dev/null
+++ b/yc2_univl/train.log
@@ -0,0 +1,1516 @@
+backup evironment completed !
+Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal
+
+
+******************** All args: *************************************************
+align_contiguous = False
+align_drop_z = 0
+align_keep_percentile = 0.1
+align_many_to_one = False
+align_one_to_many = False
+align_top_band_size = 0
+att_hid_size = 512
+aux_loss = True
+backbone = None
+base_cfg_path = cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
+basic_ss_prob = 0
+batch_size = 1
+batch_size_for_eval = 1
+bbox_loss_coef = 0
+beta = 1
+cap_dec_n_points = 4
+cap_nheads = 1
+cap_num_feature_levels = 4
+cap_prob_clip = False
+caption_cost_type = loss
+caption_decoder_type = standard
+caption_loss_coef = 2
+cfg_path = cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
+cl_schedule_time = [0, 2]
+cl_schedule_val = [0, 0.1]
+clip_context_dim = 512
+cls_loss_coef = 2
+contrastive_hidden_size = 128
+contrastive_loss_start_coef = 0.0
+contrastive_loss_temperature = 0.1
+cost_alpha = 0.25
+cost_gamma = 2
+count_loss_coef = 0.5
+criteria_for_best_ckpt = overall
+current_lr = 5e-05
+data_norm = 0
+data_rescale = 1
+debug = False
+dec_layers = 2
+dec_n_points = 4
+device = cuda
+dict_file = data/howto/vocabulary_howto_rate2_yc2.json
+dict_file_val = data/howto/vocabulary_howto_rate2_yc2.json
+dilation = False
+disable_contrastive_projection = 1
+disable_cudnn = 0
+disable_mid_caption_heads = False
+disable_rematch = False
+disable_tqdm = False
+drop_prob = 0.5
+ec_alpha = 1.0
+enable_bg_for_cl = True
+enable_contrastive = False
+enable_cross_video_cl = True
+enable_e2t_cl = True
+enc_layers = 2
+enc_n_points = 4
+eos_coef = 0.1
+epoch = 20
+event_context_dim = None
+feature_dim = 768
+feature_sample_rate = 1
+fix_xcw = 1
+focal_alpha = 0.25
+focal_gamma = 2.0
+focal_mil = False
+frame_embedding_num = 200
+ft_gt_percent = 1.0
+giou_loss_coef = 4
+gpu_id = []
+grad_clip = 100.0
+gt_file_for_auc = data/anet/captiondata/val_all.json
+gt_file_for_eval = ['data/yc2/captiondata/yc2_val.json']
+gt_file_for_para_eval = ['data/yc2/captiondata/para/para_yc2_val.json']
+gt_proposal_sample_num = 20
+hidden_dim = 512
+hidden_dropout_prob = 0.5
+huggingface_cache_dir = .cache
+id = seq2-ft(mix)-gt_percent-1.0
+id_ori = 
+input_encoding_size = 512
+invalid_video_json = []
+iteration = 3
+layer_norm_eps = 1e-12
+learning_rate_decay_every = 3
+learning_rate_decay_rate = 0.5
+learning_rate_decay_start = 8
+lloss_beta = 1
+lloss_cross_entropy = 0
+lloss_focal_loss = 0
+lloss_gau_mask = 1
+lr = 5e-05
+lr_backbone = 2e-05
+lr_backbone_names = ['None']
+lr_linear_proj_mult = 0.1
+lr_linear_proj_names = ['reference_points', 'sampling_offsets']
+lr_proj = 0
+map = True
+matcher_type = default
+max_caption_len = 50
+max_eseq_length = 20
+max_pos_num = 500
+max_text_input_len = 32
+merge_criterion = ins_cap_topk
+merge_k_boxes = 3
+merge_mode = weighted_sum
+mil_loss_coef = 0
+min_epoch_when_save = -1
+nheads = 8
+norm_ins_score = sigmoid
+nthreads = 4
+num_classes = 1
+num_feature_levels = 4
+num_layers = 1
+num_neg_box = 10
+num_queries = 100
+optimizer_type = adam
+position_embedding = sine
+position_embedding_scale = 6.283185307179586
+pre_percent = 1.0
+pretrain = None
+pretrain_path = 
+pretrained_language_model = UniVL
+prior_anchor_duration_init = True
+prior_manner = all
+pseudo_box_aug = False
+pseudo_box_aug_mode = random_range
+pseudo_box_aug_num = 8
+pseudo_box_aug_ratio = 0.02
+pseudo_box_type = similarity_op_order_v2
+random_anchor_init = True
+random_seed = False
+ref_rank_loss_coef = 0.0
+refine_pseudo_box = False
+refine_pseudo_stage_num = 2
+rnn_size = 512
+sample_method = nearest
+save_all_checkpoint = 0
+save_checkpoint_every = 1
+save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs
+scheduled_sampling_increase_every = 2
+scheduled_sampling_increase_prob = 0.05
+scheduled_sampling_max_prob = 0.25
+scheduled_sampling_start = -1
+seed = 777
+self_iou_loss_coef = 0.0
+set_cost_bbox = 0
+set_cost_caption = 0
+set_cost_cl = 0.0
+set_cost_class = 2
+set_cost_giou = 4
+set_cost_sim = 1.0
+share_caption_head = 1
+soft_attention = 1
+start_from = 
+start_from_mode = last
+start_refine_epoch = -1
+statistic_mode = mode
+test = False
+text_encoder_learning_strategy = frozen
+text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/']
+text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/']
+text_hidden_dim = 768
+top_frames = 25
+train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json']
+train_proposal_sample_num = 30
+train_proposal_type = gt
+training_scheme = all
+transformer_dropout_prob = 0.1
+transformer_ff_dim = 512
+transformer_input_type = queries
+use_additional_cap_layer = False
+use_additional_score_layer = False
+use_anchor = 0
+use_neg_pseudo_box = False
+use_pseudo_box = False
+use_query_box_for_refine = 0
+val_caption_file = data/yc2/captiondata/yc2_val.json
+visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/']
+visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/']
+visual_feature_type = ['UniVL']
+vocab_size = 14538
+vocab_size_val = 14538
+weight_decay = 0.0001
+weighted_mil_loss = False
+width_ratio = 1
+width_th = 1
+window_size = 3
+with_box_refine = 1
+wordRNN_input_feats_type = C
+
+
+******************** Model structure: ******************************************
+PDVC(
+  (base_encoder): BaseEncoder(
+    (pos_embed): PositionEmbeddingSine(
+      (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True)
+    )
+    (input_proj): ModuleList(
+      (0): Sequential(
+        (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (1): Sequential(
+        (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (2): Sequential(
+        (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+      (3): Sequential(
+        (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
+        (1): GroupNorm(32, 512, eps=1e-05, affine=True)
+      )
+    )
+  )
+  (transformer): DeformableTransformer(
+    (encoder): DeformableTransformerEncoder(
+      (layers): ModuleList(
+        (0): DeformableTransformerEncoderLayer(
+          (self_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+        (1): DeformableTransformerEncoderLayer(
+          (self_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+      )
+    )
+    (decoder): DeformableTransformerDecoder(
+      (layers): ModuleList(
+        (0): DeformableTransformerDecoderLayer(
+          (cross_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (self_attn): MultiheadAttention(
+            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout4): Dropout(p=0.1, inplace=False)
+          (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+        (1): DeformableTransformerDecoderLayer(
+          (cross_attn): MSDeformAttn(
+            (sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
+            (attention_weights): Linear(in_features=512, out_features=128, bias=True)
+            (value_proj): Linear(in_features=512, out_features=512, bias=True)
+            (output_proj): Linear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout1): Dropout(p=0.1, inplace=False)
+          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (self_attn): MultiheadAttention(
+            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
+          )
+          (dropout2): Dropout(p=0.1, inplace=False)
+          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+          (linear1): Linear(in_features=512, out_features=512, bias=True)
+          (dropout3): Dropout(p=0.1, inplace=False)
+          (linear2): Linear(in_features=512, out_features=512, bias=True)
+          (dropout4): Dropout(p=0.1, inplace=False)
+          (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
+        )
+      )
+      (bbox_head): ModuleList(
+        (0): MLP(
+          (layers): ModuleList(
+            (0): Linear(in_features=512, out_features=512, bias=True)
+            (1): Linear(in_features=512, out_features=512, bias=True)
+            (2): Linear(in_features=512, out_features=2, bias=True)
+          )
+        )
+        (1): MLP(
+          (layers): ModuleList(
+            (0): Linear(in_features=512, out_features=512, bias=True)
+            (1): Linear(in_features=512, out_features=512, bias=True)
+            (2): Linear(in_features=512, out_features=2, bias=True)
+          )
+        )
+      )
+    )
+    (pos_trans): Linear(in_features=512, out_features=1024, bias=True)
+    (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
+    (reference_points): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (caption_head): ModuleList(
+    (0): LSTMDSACaptioner(
+      (embed): Embedding(14539, 512)
+      (logit): Linear(in_features=512, out_features=14539, bias=True)
+      (dropout): Dropout(p=0.5, inplace=False)
+      (core): ShowAttendTellCore(
+        (rnn): LSTM(1536, 512, bias=False, dropout=0.5)
+        (att_drop): Dropout(p=0.5, inplace=False)
+        (deformable_att): MSDeformAttnCap(
+          (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
+          (attention_weights): Linear(in_features=1024, out_features=16, bias=True)
+          (value_proj): Linear(in_features=512, out_features=512, bias=True)
+          (output_proj): Linear(in_features=512, out_features=512, bias=True)
+        )
+        (ctx2att): Linear(in_features=512, out_features=512, bias=True)
+        (h2att): Linear(in_features=512, out_features=512, bias=True)
+        (alpha_net): Linear(in_features=512, out_features=1, bias=True)
+      )
+    )
+    (1): LSTMDSACaptioner(
+      (embed): Embedding(14539, 512)
+      (logit): Linear(in_features=512, out_features=14539, bias=True)
+      (dropout): Dropout(p=0.5, inplace=False)
+      (core): ShowAttendTellCore(
+        (rnn): LSTM(1536, 512, bias=False, dropout=0.5)
+        (att_drop): Dropout(p=0.5, inplace=False)
+        (deformable_att): MSDeformAttnCap(
+          (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
+          (attention_weights): Linear(in_features=1024, out_features=16, bias=True)
+          (value_proj): Linear(in_features=512, out_features=512, bias=True)
+          (output_proj): Linear(in_features=512, out_features=512, bias=True)
+        )
+        (ctx2att): Linear(in_features=512, out_features=512, bias=True)
+        (h2att): Linear(in_features=512, out_features=512, bias=True)
+        (alpha_net): Linear(in_features=512, out_features=1, bias=True)
+      )
+    )
+  )
+  (query_embed): Embedding(100, 1024)
+  (class_head): ModuleList(
+    (0): Linear(in_features=512, out_features=1, bias=True)
+    (1): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (class_refine_head): ModuleList(
+    (0): Linear(in_features=512, out_features=1, bias=True)
+    (1): Linear(in_features=512, out_features=1, bias=True)
+  )
+  (count_head): ModuleList(
+    (0): Linear(in_features=512, out_features=21, bias=True)
+    (1): Linear(in_features=512, out_features=21, bias=True)
+  )
+  (bbox_head): ModuleList(
+    (0): MLP(
+      (layers): ModuleList(
+        (0): Linear(in_features=512, out_features=512, bias=True)
+        (1): Linear(in_features=512, out_features=512, bias=True)
+        (2): Linear(in_features=512, out_features=2, bias=True)
+      )
+    )
+    (1): MLP(
+      (layers): ModuleList(
+        (0): Linear(in_features=512, out_features=512, bias=True)
+        (1): Linear(in_features=512, out_features=512, bias=True)
+        (2): Linear(in_features=512, out_features=2, bias=True)
+      )
+    )
+  )
+  (contrastive_projection_event): ModuleList(
+    (0): Identity()
+    (1): Identity()
+  )
+  (contrastive_projection_text): ModuleList(
+    (0): Identity()
+    (1): Identity()
+  )
+)
+
+
+******************** Strat training ! ******************************************
+loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0'])
+loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0])
+ID seq2-ft(mix)-gt_percent-1.0 iter 133 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.336), ('loss_counter', 0.129), ('loss_bbox', 0.039), ('loss_giou', 0.368), ('loss_self_iou', 0.028), ('cardinality_error', 7.797), ('loss_ce_0', 0.337), ('loss_counter_0', 0.13), ('loss_bbox_0', 0.041), ('loss_giou_0', 0.381), ('loss_self_iou_0', 0.03), ('cardinality_error_0', 7.797), ('loss_caption_0', 2.755), ('loss_caption', 2.681), ('total_loss', 15.341)]), 
+time/iter = 0.172, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 266 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.324), ('loss_counter', 0.129), ('loss_bbox', 0.036), ('loss_giou', 0.369), ('loss_self_iou', 0.018), ('cardinality_error', 7.812), ('loss_ce_0', 0.341), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.039), ('loss_giou_0', 0.38), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.812), ('loss_caption_0', 2.803), ('loss_caption', 2.638), ('total_loss', 15.341)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 399 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.13), ('loss_bbox', 0.039), ('loss_giou', 0.375), ('loss_self_iou', 0.02), ('cardinality_error', 7.835), ('loss_ce_0', 0.324), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.043), ('loss_giou_0', 0.395), ('loss_self_iou_0', 0.021), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.81), ('loss_caption', 2.676), ('total_loss', 15.459)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 532 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.133), ('loss_bbox', 0.044), ('loss_giou', 0.394), ('loss_self_iou', 0.02), ('cardinality_error', 7.902), ('loss_ce_0', 0.319), ('loss_counter_0', 0.133), ('loss_bbox_0', 0.05), ('loss_giou_0', 0.421), ('loss_self_iou_0', 0.026), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.817), ('loss_caption', 2.654), ('total_loss', 15.588)]), 
+time/iter = 0.167, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 665 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.135), ('loss_bbox', 0.034), ('loss_giou', 0.345), ('loss_self_iou', 0.017), ('cardinality_error', 7.805), ('loss_ce_0', 0.319), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.038), ('loss_giou_0', 0.372), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.805), ('loss_caption_0', 2.758), ('loss_caption', 2.635), ('total_loss', 15.049)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 798 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.321), ('loss_counter', 0.125), ('loss_bbox', 0.03), ('loss_giou', 0.319), ('loss_self_iou', 0.015), ('cardinality_error', 7.774), ('loss_ce_0', 0.331), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.032), ('loss_giou_0', 0.344), ('loss_self_iou_0', 0.015), ('cardinality_error_0', 7.774), ('loss_caption_0', 2.66), ('loss_caption', 2.559), ('total_loss', 14.519)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 931 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.122), ('loss_bbox', 0.027), ('loss_giou', 0.306), ('loss_self_iou', 0.011), ('cardinality_error', 7.865), ('loss_ce_0', 0.346), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.327), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.54), ('loss_caption', 2.468), ('total_loss', 14.017)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1064 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.331), ('loss_counter', 0.121), ('loss_bbox', 0.027), ('loss_giou', 0.292), ('loss_self_iou', 0.01), ('cardinality_error', 7.579), ('loss_ce_0', 0.345), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.311), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.579), ('loss_caption_0', 2.639), ('loss_caption', 2.626), ('total_loss', 14.419)]), 
+time/iter = 0.163, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1197 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.325), ('loss_counter', 0.118), ('loss_bbox', 0.026), ('loss_giou', 0.296), ('loss_self_iou', 0.011), ('cardinality_error', 7.241), ('loss_ce_0', 0.339), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.317), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.241), ('loss_caption_0', 2.501), ('loss_caption', 2.496), ('total_loss', 13.892)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1330 (epoch 0), 
+loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.126), ('loss_bbox', 0.026), ('loss_giou', 0.304), ('loss_self_iou', 0.011), ('cardinality_error', 7.94), ('loss_ce_0', 0.334), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.332), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.94), ('loss_caption_0', 2.635), ('loss_caption', 2.619), ('total_loss', 14.504)]), 
+time/iter = 0.158, bad_vid = 0.000
+
+Validation results of iter 1333:
+Bleu_1:0.16894357888730638
+Bleu_2:0.09902176620134434
+Bleu_3:0.05312286436412136
+Bleu_4:0.026212861867102137
+METEOR:0.0791142699299577
+ROUGE_L:0.15563765109454591
+CIDEr:0.4087091055845523
+Recall:0.1991554685892762
+Precision:0.40083793546594454
+soda_c:0.05642652494419026
+para_Bleu_1:0.28013834967939705
+para_Bleu_2:0.16393959632782257
+para_Bleu_3:0.09809744775628881
+para_Bleu_4:0.060378126412557326
+para_METEOR:0.1286956339033507
+para_ROUGE_L:0.29903071052996405
+para_CIDEr:0.14675303603221324
+
+overall score of iter 1333: 0.3358267963481213
+
+Save model at iter 1333 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 1333 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 1463 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.322), ('loss_counter', 0.128), ('loss_bbox', 0.026), ('loss_giou', 0.301), ('loss_self_iou', 0.011), ('cardinality_error', 7.699), ('loss_ce_0', 0.335), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.316), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.448), ('loss_caption', 2.462), ('total_loss', 13.729)]), 
+time/iter = 0.660, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1596 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.126), ('loss_bbox', 0.022), ('loss_giou', 0.284), ('loss_self_iou', 0.01), ('cardinality_error', 8.233), ('loss_ce_0', 0.322), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.31), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.348), ('loss_caption', 2.348), ('total_loss', 13.16)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1729 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.124), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 7.632), ('loss_ce_0', 0.32), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.307), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.363), ('loss_caption', 2.353), ('total_loss', 13.14)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1862 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.12), ('loss_bbox', 0.023), ('loss_giou', 0.268), ('loss_self_iou', 0.01), ('cardinality_error', 7.609), ('loss_ce_0', 0.32), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.29), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.609), ('loss_caption_0', 2.439), ('loss_caption', 2.419), ('total_loss', 13.343)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 1995 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.281), ('loss_self_iou', 0.009), ('cardinality_error', 7.541), ('loss_ce_0', 0.322), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.309), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.541), ('loss_caption_0', 2.503), ('loss_caption', 2.503), ('total_loss', 13.766)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2128 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.126), ('loss_bbox', 0.024), ('loss_giou', 0.284), ('loss_self_iou', 0.009), ('cardinality_error', 7.789), ('loss_ce_0', 0.324), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.301), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.789), ('loss_caption_0', 2.5), ('loss_caption', 2.493), ('total_loss', 13.73)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2261 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.122), ('loss_bbox', 0.023), ('loss_giou', 0.285), ('loss_self_iou', 0.012), ('cardinality_error', 7.902), ('loss_ce_0', 0.316), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.304), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.425), ('loss_caption', 2.424), ('total_loss', 13.426)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2394 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.315), ('loss_counter', 0.126), ('loss_bbox', 0.025), ('loss_giou', 0.29), ('loss_self_iou', 0.011), ('cardinality_error', 7.534), ('loss_ce_0', 0.323), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.308), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.439), ('loss_caption', 2.435), ('total_loss', 13.54)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2527 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.125), ('loss_bbox', 0.023), ('loss_giou', 0.276), ('loss_self_iou', 0.009), ('cardinality_error', 7.647), ('loss_ce_0', 0.319), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.647), ('loss_caption_0', 2.454), ('loss_caption', 2.455), ('total_loss', 13.492)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2660 (epoch 1), 
+loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.131), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 8.0), ('loss_ce_0', 0.317), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.294), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.0), ('loss_caption_0', 2.464), ('loss_caption', 2.451), ('total_loss', 13.487)]), 
+time/iter = 0.167, bad_vid = 0.000
+
+Validation results of iter 2666:
+Bleu_1:0.18247710374533507
+Bleu_2:0.10433126216854799
+Bleu_3:0.05471515540980739
+Bleu_4:0.025315544998990337
+METEOR:0.08392673175891194
+ROUGE_L:0.16810710582244187
+CIDEr:0.48711946137609907
+Recall:0.23104975652842194
+Precision:0.4442690424090867
+soda_c:0.06454827356060923
+para_Bleu_1:0.27953804293947354
+para_Bleu_2:0.1635778619591909
+para_Bleu_3:0.09761782578266559
+para_Bleu_4:0.060085255296605154
+para_METEOR:0.13134445752685775
+para_ROUGE_L:0.3040652157082556
+para_CIDEr:0.15701615141849948
+
+overall score of iter 2666: 0.34844586424196233
+
+Save model at iter 2666 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 2666 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 2793 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.309), ('loss_counter', 0.119), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.01), ('cardinality_error', 7.556), ('loss_ce_0', 0.312), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.556), ('loss_caption_0', 2.27), ('loss_caption', 2.276), ('total_loss', 12.632)]), 
+time/iter = 0.666, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 2926 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.121), ('loss_bbox', 0.023), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.444), ('loss_ce_0', 0.317), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.287), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.444), ('loss_caption_0', 2.276), ('loss_caption', 2.291), ('total_loss', 12.726)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3059 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.127), ('loss_bbox', 0.02), ('loss_giou', 0.272), ('loss_self_iou', 0.008), ('cardinality_error', 8.135), ('loss_ce_0', 0.302), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.135), ('loss_caption_0', 2.364), ('loss_caption', 2.364), ('total_loss', 13.057)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3192 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.699), ('loss_ce_0', 0.306), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.286), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.367), ('loss_caption', 2.381), ('total_loss', 13.038)]), 
+time/iter = 0.177, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3325 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.123), ('loss_bbox', 0.021), ('loss_giou', 0.274), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.3), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.291), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.323), ('loss_caption', 2.33), ('total_loss', 12.887)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3458 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.277), ('loss_self_iou', 0.01), ('cardinality_error', 7.865), ('loss_ce_0', 0.31), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.295), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.351), ('loss_caption', 2.341), ('total_loss', 13.038)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3591 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.114), ('loss_bbox', 0.022), ('loss_giou', 0.263), ('loss_self_iou', 0.009), ('cardinality_error', 7.586), ('loss_ce_0', 0.308), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.222), ('loss_caption', 2.223), ('total_loss', 12.425)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3724 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.123), ('loss_bbox', 0.023), ('loss_giou', 0.265), ('loss_self_iou', 0.009), ('cardinality_error', 7.624), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.624), ('loss_caption_0', 2.38), ('loss_caption', 2.368), ('total_loss', 13.014)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3857 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.264), ('loss_self_iou', 0.009), ('cardinality_error', 7.489), ('loss_ce_0', 0.312), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.489), ('loss_caption_0', 2.343), ('loss_caption', 2.344), ('total_loss', 12.897)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 3990 (epoch 2), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.134), ('loss_bbox', 0.02), ('loss_giou', 0.268), ('loss_self_iou', 0.012), ('cardinality_error', 8.301), ('loss_ce_0', 0.299), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.289), ('loss_self_iou_0', 0.013), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.327), ('loss_caption', 2.346), ('total_loss', 12.9)]), 
+time/iter = 0.154, bad_vid = 0.000
+
+Validation results of iter 3999:
+Bleu_1:0.18812761655735627
+Bleu_2:0.11394688266117041
+Bleu_3:0.06350983100569632
+Bleu_4:0.03295035253718016
+METEOR:0.08673497362280043
+ROUGE_L:0.17099683701262633
+CIDEr:0.534654554166069
+Recall:0.2545535313519452
+Precision:0.4357073390990242
+soda_c:0.06940030844072555
+para_Bleu_1:0.31911536052560924
+para_Bleu_2:0.19074275606485158
+para_Bleu_3:0.11503629156908896
+para_Bleu_4:0.07096292455051724
+para_METEOR:0.14141970569772275
+para_ROUGE_L:0.3133292457236414
+para_CIDEr:0.18756071216976763
+
+overall score of iter 3999: 0.3999433424180076
+
+Save model at iter 3999 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 3999 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 4123 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.129), ('loss_bbox', 0.021), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.925), ('loss_ce_0', 0.307), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 2.272), ('loss_caption', 2.28), ('total_loss', 12.579)]), 
+time/iter = 0.678, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4256 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.121), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.632), ('loss_ce_0', 0.31), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.247), ('loss_caption', 2.252), ('total_loss', 12.484)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4389 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.12), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.011), ('cardinality_error', 7.526), ('loss_ce_0', 0.309), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.272), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.526), ('loss_caption_0', 2.194), ('loss_caption', 2.205), ('total_loss', 12.273)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4522 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.115), ('loss_bbox', 0.019), ('loss_giou', 0.248), ('loss_self_iou', 0.007), ('cardinality_error', 7.519), ('loss_ce_0', 0.303), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.335), ('loss_caption', 2.326), ('total_loss', 12.689)]), 
+time/iter = 0.170, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4655 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.263), ('loss_self_iou', 0.008), ('cardinality_error', 7.97), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.97), ('loss_caption_0', 2.254), ('loss_caption', 2.267), ('total_loss', 12.545)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4788 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.253), ('loss_self_iou', 0.008), ('cardinality_error', 7.481), ('loss_ce_0', 0.308), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.481), ('loss_caption_0', 2.208), ('loss_caption', 2.195), ('total_loss', 12.24)]), 
+time/iter = 0.151, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 4921 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.262), ('loss_self_iou', 0.01), ('cardinality_error', 7.842), ('loss_ce_0', 0.305), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.284), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.842), ('loss_caption_0', 2.186), ('loss_caption', 2.196), ('total_loss', 12.289)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5054 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.121), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.009), ('cardinality_error', 7.887), ('loss_ce_0', 0.305), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.887), ('loss_caption_0', 2.242), ('loss_caption', 2.239), ('total_loss', 12.422)]), 
+time/iter = 0.170, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5187 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.262), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.305), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.277), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.25), ('loss_caption', 2.246), ('total_loss', 12.483)]), 
+time/iter = 0.166, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5320 (epoch 3), 
+loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.12), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.729), ('loss_caption_0', 2.287), ('loss_caption', 2.298), ('total_loss', 12.64)]), 
+time/iter = 0.161, bad_vid = 0.000
+
+Validation results of iter 5332:
+Bleu_1:0.19536023703614988
+Bleu_2:0.11676341716851109
+Bleu_3:0.06337153157323498
+Bleu_4:0.031788948303475714
+METEOR:0.09287502887069582
+ROUGE_L:0.18168372139225142
+CIDEr:0.5345089450528974
+Recall:0.26186565000159123
+Precision:0.4578470702650138
+soda_c:0.06891495599002981
+para_Bleu_1:0.3645537642333956
+para_Bleu_2:0.21504928179111618
+para_Bleu_3:0.1297486406737134
+para_Bleu_4:0.08010111193897063
+para_METEOR:0.1518569517959942
+para_ROUGE_L:0.3241825281759821
+para_CIDEr:0.22211083978975357
+
+overall score of iter 5332: 0.4540689035247184
+
+Save model at iter 5332 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 5332 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 5453 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.022), ('loss_giou', 0.25), ('loss_self_iou', 0.011), ('cardinality_error', 7.519), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.175), ('loss_caption', 2.176), ('total_loss', 12.088)]), 
+time/iter = 0.716, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5586 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.12), ('loss_bbox', 0.018), ('loss_giou', 0.252), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.292), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.274), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 2.16), ('loss_caption', 2.132), ('total_loss', 11.979)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5719 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.13), ('loss_bbox', 0.02), ('loss_giou', 0.255), ('loss_self_iou', 0.008), ('cardinality_error', 8.451), ('loss_ce_0', 0.302), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.273), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.451), ('loss_caption_0', 2.166), ('loss_caption', 2.164), ('total_loss', 12.113)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5852 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.246), ('loss_self_iou', 0.007), ('cardinality_error', 7.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.267), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.122), ('loss_caption', 2.111), ('total_loss', 11.841)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 5985 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.243), ('loss_self_iou', 0.009), ('cardinality_error', 7.474), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.263), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.474), ('loss_caption_0', 2.149), ('loss_caption', 2.14), ('total_loss', 11.926)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6118 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.113), ('loss_bbox', 0.018), ('loss_giou', 0.241), ('loss_self_iou', 0.008), ('cardinality_error', 7.639), ('loss_ce_0', 0.302), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.259), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.639), ('loss_caption_0', 2.235), ('loss_caption', 2.215), ('total_loss', 12.218)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6251 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.125), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 7.857), ('loss_ce_0', 0.301), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.857), ('loss_caption_0', 2.235), ('loss_caption', 2.226), ('total_loss', 12.328)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6384 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.02), ('loss_giou', 0.246), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.301), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.208), ('loss_caption', 2.183), ('total_loss', 12.157)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6517 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.12), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.872), ('loss_ce_0', 0.295), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.872), ('loss_caption_0', 2.135), ('loss_caption', 2.155), ('total_loss', 11.99)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6650 (epoch 4), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.112), ('loss_bbox', 0.021), ('loss_giou', 0.244), ('loss_self_iou', 0.008), ('cardinality_error', 7.398), ('loss_ce_0', 0.297), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.26), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.398), ('loss_caption_0', 2.205), ('loss_caption', 2.202), ('total_loss', 12.127)]), 
+time/iter = 0.156, bad_vid = 0.000
+
+Validation results of iter 6665:
+Bleu_1:0.19366491706119263
+Bleu_2:0.1161802397372496
+Bleu_3:0.06381908710297783
+Bleu_4:0.0310996008751752
+METEOR:0.0900086447067842
+ROUGE_L:0.1772625018945245
+CIDEr:0.5329339889166991
+Recall:0.27822837264850414
+Precision:0.4414053002674447
+soda_c:0.0725148309247326
+para_Bleu_1:0.36779729697992286
+para_Bleu_2:0.2189609464261768
+para_Bleu_3:0.13170237886801614
+para_Bleu_4:0.08102932652379062
+para_METEOR:0.15287168689015676
+para_ROUGE_L:0.32609559286330886
+para_CIDEr:0.24981796796266917
+
+overall score of iter 6665: 0.48371898137661656
+
+Save model at iter 6665 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 6665 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 6783 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.117), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.586), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.02), ('loss_caption', 2.014), ('total_loss', 11.332)]), 
+time/iter = 0.689, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 6916 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.249), ('loss_self_iou', 0.008), ('cardinality_error', 7.519), ('loss_ce_0', 0.302), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.264), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.118), ('loss_caption', 2.101), ('total_loss', 11.817)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7049 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.25), ('loss_self_iou', 0.007), ('cardinality_error', 7.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.105), ('loss_caption', 2.111), ('total_loss', 11.78)]), 
+time/iter = 0.172, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7182 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.242), ('loss_self_iou', 0.008), ('cardinality_error', 7.594), ('loss_ce_0', 0.288), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.594), ('loss_caption_0', 2.194), ('loss_caption', 2.195), ('total_loss', 12.045)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7315 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.123), ('loss_bbox', 0.02), ('loss_giou', 0.254), ('loss_self_iou', 0.009), ('cardinality_error', 8.301), ('loss_ce_0', 0.291), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.096), ('loss_caption', 2.09), ('total_loss', 11.741)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7448 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.234), ('loss_self_iou', 0.006), ('cardinality_error', 7.677), ('loss_ce_0', 0.292), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.251), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.076), ('loss_caption', 2.063), ('total_loss', 11.513)]), 
+time/iter = 0.152, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7581 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.116), ('loss_bbox', 0.019), ('loss_giou', 0.238), ('loss_self_iou', 0.008), ('cardinality_error', 7.534), ('loss_ce_0', 0.295), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.114), ('loss_caption', 2.112), ('total_loss', 11.718)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7714 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.677), ('loss_ce_0', 0.291), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.167), ('loss_caption', 2.179), ('total_loss', 11.932)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7847 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.252), ('loss_self_iou', 0.009), ('cardinality_error', 8.053), ('loss_ce_0', 0.289), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.053), ('loss_caption_0', 2.106), ('loss_caption', 2.115), ('total_loss', 11.804)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 7980 (epoch 5), 
+loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.249), ('loss_self_iou', 0.007), ('cardinality_error', 7.902), ('loss_ce_0', 0.295), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.151), ('loss_caption', 2.153), ('total_loss', 11.979)]), 
+time/iter = 0.158, bad_vid = 0.000
+
+Validation results of iter 7998:
+Bleu_1:0.19874944106127662
+Bleu_2:0.12266046915797622
+Bleu_3:0.07150852984916518
+Bleu_4:0.036185181004552064
+METEOR:0.09274687098087099
+ROUGE_L:0.18413336093424784
+CIDEr:0.5727051685734265
+Recall:0.259037909270404
+Precision:0.451289465457956
+soda_c:0.07263494732248185
+para_Bleu_1:0.32307562783294125
+para_Bleu_2:0.1944214796418441
+para_Bleu_3:0.11901149393254483
+para_Bleu_4:0.07454555120453704
+para_METEOR:0.14324209261218024
+para_ROUGE_L:0.31918573126228
+para_CIDEr:0.23096832321460165
+
+overall score of iter 7998: 0.4487559670313189
+
+Save model at iter 7998 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 8113 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.236), ('loss_self_iou', 0.008), ('cardinality_error', 7.617), ('loss_ce_0', 0.295), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.036), ('loss_caption', 2.044), ('total_loss', 11.427)]), 
+time/iter = 0.677, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8246 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.237), ('loss_self_iou', 0.006), ('cardinality_error', 7.827), ('loss_ce_0', 0.283), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 2.055), ('loss_caption', 2.057), ('total_loss', 11.458)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8379 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.118), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.286), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.046), ('loss_caption', 2.041), ('total_loss', 11.331)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8512 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.228), ('loss_self_iou', 0.006), ('cardinality_error', 7.654), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.245), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.654), ('loss_caption_0', 1.991), ('loss_caption', 1.997), ('total_loss', 11.118)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8645 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 8.068), ('loss_ce_0', 0.287), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.068), ('loss_caption_0', 2.094), ('loss_caption', 2.097), ('total_loss', 11.714)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8778 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.121), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.008), ('cardinality_error', 8.008), ('loss_ce_0', 0.286), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.258), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.008), ('loss_caption_0', 2.092), ('loss_caption', 2.092), ('total_loss', 11.63)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 8911 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.338), ('loss_ce_0', 0.297), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.248), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.338), ('loss_caption_0', 2.051), ('loss_caption', 2.054), ('total_loss', 11.446)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9044 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.105), ('loss_bbox', 0.02), ('loss_giou', 0.227), ('loss_self_iou', 0.008), ('cardinality_error', 7.226), ('loss_ce_0', 0.292), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.226), ('loss_caption_0', 2.08), ('loss_caption', 2.084), ('total_loss', 11.478)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9177 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.254), ('loss_self_iou', 0.007), ('cardinality_error', 7.977), ('loss_ce_0', 0.288), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.977), ('loss_caption_0', 2.046), ('loss_caption', 2.031), ('total_loss', 11.546)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9310 (epoch 6), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.236), ('loss_self_iou', 0.006), ('cardinality_error', 7.97), ('loss_ce_0', 0.281), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.986), ('loss_caption', 1.995), ('total_loss', 11.157)]), 
+time/iter = 0.152, bad_vid = 0.000
+
+Validation results of iter 9331:
+Bleu_1:0.2003309018825777
+Bleu_2:0.1225756065112458
+Bleu_3:0.06724461390362559
+Bleu_4:0.033684328156599955
+METEOR:0.0938288297360794
+ROUGE_L:0.1832565856913202
+CIDEr:0.5805494889367487
+Recall:0.28578288505804933
+Precision:0.4570872842207636
+soda_c:0.07457933387713374
+para_Bleu_1:0.3713316702717572
+para_Bleu_2:0.22391267992808692
+para_Bleu_3:0.1360620228892395
+para_Bleu_4:0.08475146307949002
+para_METEOR:0.15553928732702577
+para_ROUGE_L:0.3279787647771023
+para_CIDEr:0.24807495620487915
+
+overall score of iter 9331: 0.4883657066113949
+
+Save model at iter 9331 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 9331 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 9443 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.226), ('loss_self_iou', 0.006), ('cardinality_error', 7.617), ('loss_ce_0', 0.292), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.065), ('loss_caption', 2.061), ('total_loss', 11.394)]), 
+time/iter = 0.717, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9576 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.02), ('loss_giou', 0.231), ('loss_self_iou', 0.006), ('cardinality_error', 7.917), ('loss_ce_0', 0.284), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.917), ('loss_caption_0', 1.977), ('loss_caption', 1.974), ('total_loss', 11.093)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9709 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.117), ('loss_bbox', 0.016), ('loss_giou', 0.224), ('loss_self_iou', 0.006), ('cardinality_error', 8.098), ('loss_ce_0', 0.29), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.098), ('loss_caption_0', 2.051), ('loss_caption', 2.063), ('total_loss', 11.373)]), 
+time/iter = 0.170, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9842 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.242), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.286), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.939), ('loss_caption', 1.953), ('total_loss', 11.058)]), 
+time/iter = 0.169, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 9975 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.116), ('loss_bbox', 0.017), ('loss_giou', 0.238), ('loss_self_iou', 0.006), ('cardinality_error', 8.233), ('loss_ce_0', 0.281), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.255), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.024), ('loss_caption', 2.026), ('total_loss', 11.31)]), 
+time/iter = 0.167, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10108 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.018), ('loss_giou', 0.232), ('loss_self_iou', 0.006), ('cardinality_error', 7.466), ('loss_ce_0', 0.279), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.466), ('loss_caption_0', 1.878), ('loss_caption', 1.882), ('total_loss', 10.667)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10241 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.018), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.722), ('loss_ce_0', 0.282), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.984), ('loss_caption', 1.988), ('total_loss', 11.165)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10374 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.225), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.285), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 2.089), ('loss_caption', 2.094), ('total_loss', 11.498)]), 
+time/iter = 0.164, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10507 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.113), ('loss_bbox', 0.019), ('loss_giou', 0.22), ('loss_self_iou', 0.007), ('cardinality_error', 7.564), ('loss_ce_0', 0.283), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.564), ('loss_caption_0', 1.936), ('loss_caption', 1.935), ('total_loss', 10.84)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10640 (epoch 7), 
+loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.232), ('loss_self_iou', 0.008), ('cardinality_error', 7.549), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.249), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.549), ('loss_caption_0', 2.041), ('loss_caption', 2.042), ('total_loss', 11.323)]), 
+time/iter = 0.178, bad_vid = 0.000
+
+Validation results of iter 10664:
+Bleu_1:0.19584871429233122
+Bleu_2:0.1203954133477019
+Bleu_3:0.06765236989260215
+Bleu_4:0.03515047236439923
+METEOR:0.09347581038898298
+ROUGE_L:0.18336361365161372
+CIDEr:0.5642570328531701
+Recall:0.287053410514844
+Precision:0.4506790316418327
+soda_c:0.07315525040409161
+para_Bleu_1:0.39595219023577966
+para_Bleu_2:0.23717913606151478
+para_Bleu_3:0.14480681642134902
+para_Bleu_4:0.0901695364250172
+para_METEOR:0.16127903027678414
+para_ROUGE_L:0.3324403291093838
+para_CIDEr:0.23804687234043756
+
+overall score of iter 10664: 0.48949543904223886
+
+Save model at iter 10664 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 10664 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 10773 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.114), ('loss_bbox', 0.017), ('loss_giou', 0.235), ('loss_self_iou', 0.006), ('cardinality_error', 7.94), ('loss_ce_0', 0.278), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.851), ('loss_caption', 1.84), ('total_loss', 10.561)]), 
+time/iter = 0.724, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 10906 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.109), ('loss_bbox', 0.017), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.218), ('loss_ce_0', 0.278), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.218), ('loss_caption_0', 1.945), ('loss_caption', 1.948), ('total_loss', 10.791)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11039 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.108), ('loss_bbox', 0.017), ('loss_giou', 0.207), ('loss_self_iou', 0.006), ('cardinality_error', 7.579), ('loss_ce_0', 0.283), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.579), ('loss_caption_0', 1.92), ('loss_caption', 1.927), ('total_loss', 10.664)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11172 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.451), ('loss_ce_0', 0.279), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.451), ('loss_caption_0', 1.91), ('loss_caption', 1.9), ('total_loss', 10.635)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11305 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.125), ('loss_bbox', 0.017), ('loss_giou', 0.233), ('loss_self_iou', 0.006), ('cardinality_error', 8.09), ('loss_ce_0', 0.276), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.244), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.876), ('loss_caption', 1.877), ('total_loss', 10.648)]), 
+time/iter = 0.152, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11438 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.113), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.269), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.981), ('loss_caption', 1.968), ('total_loss', 10.865)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11571 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.006), ('cardinality_error', 7.699), ('loss_ce_0', 0.277), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.833), ('loss_caption', 1.846), ('total_loss', 10.461)]), 
+time/iter = 0.149, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11704 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.21), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.91), ('loss_caption', 1.915), ('total_loss', 10.628)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11837 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.215), ('loss_self_iou', 0.007), ('cardinality_error', 8.0), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.0), ('loss_caption_0', 1.936), ('loss_caption', 1.939), ('total_loss', 10.726)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 11970 (epoch 8), 
+loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.22), ('loss_self_iou', 0.006), ('cardinality_error', 8.158), ('loss_ce_0', 0.27), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.158), ('loss_caption_0', 1.953), ('loss_caption', 1.962), ('total_loss', 10.881)]), 
+time/iter = 0.168, bad_vid = 0.000
+
+Validation results of iter 11997:
+Bleu_1:0.19696025394358163
+Bleu_2:0.12042554867022627
+Bleu_3:0.06805715701089529
+Bleu_4:0.034063345644385214
+METEOR:0.09208296372249718
+ROUGE_L:0.1803782633150628
+CIDEr:0.5812603125344058
+Recall:0.29169024735901117
+Precision:0.44299129936438486
+soda_c:0.07606608300691252
+para_Bleu_1:0.383549187276652
+para_Bleu_2:0.23192713278728125
+para_Bleu_3:0.14217181061136971
+para_Bleu_4:0.0892715976218228
+para_METEOR:0.16074434603101373
+para_ROUGE_L:0.3336567463040183
+para_CIDEr:0.2859809872200661
+
+overall score of iter 11997: 0.5359969308729027
+
+Save model at iter 11997 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 11997 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 12103 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.216), ('loss_self_iou', 0.006), ('cardinality_error', 8.038), ('loss_ce_0', 0.274), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.038), ('loss_caption_0', 1.832), ('loss_caption', 1.845), ('total_loss', 10.35)]), 
+time/iter = 0.705, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12236 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.206), ('loss_self_iou', 0.005), ('cardinality_error', 7.812), ('loss_ce_0', 0.266), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.968), ('loss_caption', 1.959), ('total_loss', 10.757)]), 
+time/iter = 0.166, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12369 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.118), ('loss_bbox', 0.016), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.27), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.89), ('loss_caption', 1.903), ('total_loss', 10.534)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12502 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.108), ('loss_bbox', 0.016), ('loss_giou', 0.205), ('loss_self_iou', 0.006), ('cardinality_error', 7.684), ('loss_ce_0', 0.268), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.684), ('loss_caption_0', 1.903), ('loss_caption', 1.905), ('total_loss', 10.519)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12635 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.218), ('loss_self_iou', 0.005), ('cardinality_error', 7.947), ('loss_ce_0', 0.269), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.232), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.947), ('loss_caption_0', 1.822), ('loss_caption', 1.826), ('total_loss', 10.284)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12768 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.111), ('loss_bbox', 0.017), ('loss_giou', 0.219), ('loss_self_iou', 0.008), ('cardinality_error', 7.669), ('loss_ce_0', 0.276), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.905), ('loss_caption', 1.909), ('total_loss', 10.662)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 12901 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.639), ('loss_ce_0', 0.267), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.639), ('loss_caption_0', 1.856), ('loss_caption', 1.863), ('total_loss', 10.344)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13034 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.274), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.841), ('loss_caption', 1.841), ('total_loss', 10.356)]), 
+time/iter = 0.149, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13167 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.018), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.406), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.406), ('loss_caption_0', 1.931), ('loss_caption', 1.927), ('total_loss', 10.663)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13300 (epoch 9), 
+loss = OrderedDict([('loss_ce', 0.274), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.005), ('cardinality_error', 7.737), ('loss_ce_0', 0.272), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.853), ('loss_caption', 1.849), ('total_loss', 10.379)]), 
+time/iter = 0.154, bad_vid = 0.000
+
+Validation results of iter 13330:
+Bleu_1:0.20446290018298774
+Bleu_2:0.12418412895577716
+Bleu_3:0.06899010124646034
+Bleu_4:0.03428116460131532
+METEOR:0.09595521703655657
+ROUGE_L:0.1876517650928566
+CIDEr:0.5887832993219201
+Recall:0.3017153873964599
+Precision:0.4588439095550697
+soda_c:0.07875391677883807
+para_Bleu_1:0.3953706124668704
+para_Bleu_2:0.24043007714841402
+para_Bleu_3:0.14833197751929023
+para_Bleu_4:0.09386644902900565
+para_METEOR:0.16476396966168239
+para_ROUGE_L:0.33760319454244797
+para_CIDEr:0.31194480042956774
+
+overall score of iter 13330: 0.5705752191202558
+
+Save model at iter 13330 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 13330 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 13433 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.112), ('loss_bbox', 0.017), ('loss_giou', 0.217), ('loss_self_iou', 0.006), ('cardinality_error', 7.835), ('loss_ce_0', 0.267), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.835), ('loss_caption_0', 1.804), ('loss_caption', 1.811), ('total_loss', 10.223)]), 
+time/iter = 0.700, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13566 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.116), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.005), ('cardinality_error', 7.774), ('loss_ce_0', 0.266), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.221), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.884), ('loss_caption', 1.887), ('total_loss', 10.42)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13699 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.201), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.823), ('loss_caption', 1.806), ('total_loss', 10.083)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13832 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.699), ('loss_ce_0', 0.271), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.855), ('loss_caption', 1.857), ('total_loss', 10.374)]), 
+time/iter = 0.164, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 13965 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.105), ('loss_bbox', 0.016), ('loss_giou', 0.196), ('loss_self_iou', 0.006), ('cardinality_error', 7.128), ('loss_ce_0', 0.271), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.128), ('loss_caption_0', 1.809), ('loss_caption', 1.8), ('total_loss', 10.055)]), 
+time/iter = 0.151, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14098 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.016), ('loss_giou', 0.213), ('loss_self_iou', 0.007), ('cardinality_error', 7.925), ('loss_ce_0', 0.273), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.863), ('loss_caption', 1.863), ('total_loss', 10.433)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14231 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.007), ('cardinality_error', 7.82), ('loss_ce_0', 0.262), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.222), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.936), ('loss_caption', 1.929), ('total_loss', 10.624)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14364 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.263), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.227), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.757), ('loss_caption', 1.754), ('total_loss', 9.948)]), 
+time/iter = 0.185, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14497 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.11), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.265), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.896), ('loss_caption', 1.894), ('total_loss', 10.407)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14630 (epoch 10), 
+loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.925), ('loss_ce_0', 0.261), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.84), ('loss_caption', 1.842), ('total_loss', 10.253)]), 
+time/iter = 0.158, bad_vid = 0.000
+
+Validation results of iter 14663:
+Bleu_1:0.19267153393038786
+Bleu_2:0.11732781330402656
+Bleu_3:0.06746115616325608
+Bleu_4:0.03425583839334337
+METEOR:0.08963300348041837
+ROUGE_L:0.17480207136309905
+CIDEr:0.575137603362526
+Recall:0.30432682743951917
+Precision:0.4353044354138446
+soda_c:0.07762847290423684
+para_Bleu_1:0.393384019586376
+para_Bleu_2:0.23835405770332685
+para_Bleu_3:0.14545808678454117
+para_Bleu_4:0.09085202435904723
+para_METEOR:0.16354570345255123
+para_ROUGE_L:0.3343729651839732
+para_CIDEr:0.27098453497923136
+
+overall score of iter 14663: 0.5253822627908299
+
+Save model at iter 14663 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 14763 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.006), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.225), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.87), ('loss_caption', 1.877), ('total_loss', 10.398)]), 
+time/iter = 0.690, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 14896 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.796), ('loss_caption', 1.784), ('total_loss', 9.979)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15029 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.006), ('cardinality_error', 7.414), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.414), ('loss_caption_0', 1.763), ('loss_caption', 1.767), ('total_loss', 9.842)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15162 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.262), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.781), ('loss_caption', 1.781), ('total_loss', 9.916)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15295 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.662), ('loss_ce_0', 0.255), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.735), ('loss_caption', 1.75), ('total_loss', 9.755)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15428 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.114), ('loss_bbox', 0.015), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.992), ('loss_ce_0', 0.261), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.852), ('loss_caption', 1.86), ('total_loss', 10.298)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15561 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.006), ('cardinality_error', 8.068), ('loss_ce_0', 0.257), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.068), ('loss_caption_0', 1.878), ('loss_caption', 1.866), ('total_loss', 10.314)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15694 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.202), ('loss_self_iou', 0.004), ('cardinality_error', 7.647), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.647), ('loss_caption_0', 1.7), ('loss_caption', 1.684), ('total_loss', 9.569)]), 
+time/iter = 0.152, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15827 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.104), ('loss_bbox', 0.016), ('loss_giou', 0.194), ('loss_self_iou', 0.005), ('cardinality_error', 7.722), ('loss_ce_0', 0.257), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.848), ('loss_caption', 1.839), ('total_loss', 10.119)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 15960 (epoch 11), 
+loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.107), ('loss_bbox', 0.015), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 7.609), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.609), ('loss_caption_0', 1.847), ('loss_caption', 1.858), ('total_loss', 10.198)]), 
+time/iter = 0.161, bad_vid = 0.000
+
+Validation results of iter 15996:
+Bleu_1:0.1989422607268001
+Bleu_2:0.12223038556953512
+Bleu_3:0.06835990671747892
+Bleu_4:0.03486159828438583
+METEOR:0.09408978838449876
+ROUGE_L:0.18200142867223945
+CIDEr:0.593480700759431
+Recall:0.30795469953703025
+Precision:0.4513424333993264
+soda_c:0.0796861065455984
+para_Bleu_1:0.39594509057043764
+para_Bleu_2:0.24087109399513515
+para_Bleu_3:0.14790262814870953
+para_Bleu_4:0.09321042711819619
+para_METEOR:0.1655617051143519
+para_ROUGE_L:0.3391051008488012
+para_CIDEr:0.32807196750555834
+
+overall score of iter 15996: 0.5868440997381064
+
+Save model at iter 15996 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save Best-model at iter 15996 to checkpoint file.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 16093 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.258), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.773), ('loss_caption', 1.769), ('total_loss', 9.789)]), 
+time/iter = 0.727, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16226 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.004), ('cardinality_error', 7.805), ('loss_ce_0', 0.259), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.805), ('loss_caption_0', 1.743), ('loss_caption', 1.749), ('total_loss', 9.786)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16359 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.116), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.797), ('loss_caption', 1.778), ('total_loss', 9.972)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16492 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.105), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.383), ('loss_ce_0', 0.257), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.383), ('loss_caption_0', 1.796), ('loss_caption', 1.808), ('total_loss', 9.899)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16625 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.005), ('cardinality_error', 7.782), ('loss_ce_0', 0.256), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.78), ('loss_caption', 1.779), ('total_loss', 9.812)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16758 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.962), ('loss_ce_0', 0.252), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.962), ('loss_caption_0', 1.795), ('loss_caption', 1.806), ('total_loss', 9.948)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 16891 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.016), ('loss_giou', 0.199), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.255), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.788), ('loss_caption', 1.782), ('total_loss', 9.914)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17024 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.511), ('loss_ce_0', 0.26), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.511), ('loss_caption_0', 1.717), ('loss_caption', 1.72), ('total_loss', 9.666)]), 
+time/iter = 0.170, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17157 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.252), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.738), ('loss_caption', 1.749), ('total_loss', 9.638)]), 
+time/iter = 0.182, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17290 (epoch 12), 
+loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.932), ('loss_ce_0', 0.254), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.932), ('loss_caption_0', 1.815), ('loss_caption', 1.83), ('total_loss', 10.067)]), 
+time/iter = 0.163, bad_vid = 0.000
+
+Validation results of iter 17329:
+Bleu_1:0.19294534256446427
+Bleu_2:0.11789730285267924
+Bleu_3:0.06601509377472357
+Bleu_4:0.03274421971508606
+METEOR:0.0906445074413136
+ROUGE_L:0.17678145420382357
+CIDEr:0.5750907875125135
+Recall:0.3073352674556176
+Precision:0.4434536834427428
+soda_c:0.07896521325127955
+para_Bleu_1:0.39483511792471604
+para_Bleu_2:0.23988438429479647
+para_Bleu_3:0.1464330354033768
+para_Bleu_4:0.09122283851671699
+para_METEOR:0.16480200992253577
+para_ROUGE_L:0.33317486176302236
+para_CIDEr:0.29080350784714515
+
+overall score of iter 17329: 0.5468283562863979
+
+Save model at iter 17329 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 17423 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.865), ('loss_caption', 1.881), ('total_loss', 10.261)]), 
+time/iter = 0.713, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17556 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.193), ('loss_self_iou', 0.004), ('cardinality_error', 7.737), ('loss_ce_0', 0.253), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.744), ('loss_caption', 1.743), ('total_loss', 9.707)]), 
+time/iter = 0.168, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17689 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.199), ('loss_self_iou', 0.006), ('cardinality_error', 7.602), ('loss_ce_0', 0.262), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.602), ('loss_caption_0', 1.835), ('loss_caption', 1.819), ('total_loss', 10.1)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17822 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.191), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.249), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.681), ('loss_caption', 1.67), ('total_loss', 9.397)]), 
+time/iter = 0.152, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 17955 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.184), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.252), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.757), ('loss_caption', 1.745), ('total_loss', 9.658)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18088 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.534), ('loss_ce_0', 0.251), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.534), ('loss_caption_0', 1.703), ('loss_caption', 1.701), ('total_loss', 9.39)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18221 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.201), ('loss_self_iou', 0.005), ('cardinality_error', 8.211), ('loss_ce_0', 0.252), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.211), ('loss_caption_0', 1.824), ('loss_caption', 1.816), ('total_loss', 10.053)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18354 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.249), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.792), ('loss_caption', 1.779), ('total_loss', 9.874)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18487 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.251), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.826), ('loss_caption', 1.81), ('total_loss', 9.979)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18620 (epoch 13), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.003), ('cardinality_error', 7.737), ('loss_ce_0', 0.251), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.767), ('loss_caption', 1.771), ('total_loss', 9.784)]), 
+time/iter = 0.153, bad_vid = 0.000
+
+Validation results of iter 18662:
+Bleu_1:0.1916652028982354
+Bleu_2:0.11864819375256218
+Bleu_3:0.06801290454817709
+Bleu_4:0.03421778123301331
+METEOR:0.08890100804282676
+ROUGE_L:0.17229926562968575
+CIDEr:0.5719694906113042
+Recall:0.3115151404333572
+Precision:0.42734448265082836
+soda_c:0.07979305036983636
+para_Bleu_1:0.3972508455506424
+para_Bleu_2:0.24317507500304622
+para_Bleu_3:0.1497047997976745
+para_Bleu_4:0.09437727320664267
+para_METEOR:0.16651343432042678
+para_ROUGE_L:0.33875534436877147
+para_CIDEr:0.29220356232363026
+
+overall score of iter 18662: 0.5530942698506998
+
+Save model at iter 18662 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 18753 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 8.241), ('loss_ce_0', 0.251), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.241), ('loss_caption_0', 1.758), ('loss_caption', 1.759), ('total_loss', 9.756)]), 
+time/iter = 0.731, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 18886 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.182), ('loss_self_iou', 0.004), ('cardinality_error', 7.436), ('loss_ce_0', 0.245), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.436), ('loss_caption_0', 1.696), ('loss_caption', 1.692), ('total_loss', 9.366)]), 
+time/iter = 0.163, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19019 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 7.692), ('loss_ce_0', 0.242), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.73), ('loss_caption', 1.729), ('total_loss', 9.496)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19152 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.112), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.251), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.779), ('loss_caption', 1.771), ('total_loss', 9.714)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19285 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.014), ('loss_giou', 0.194), ('loss_self_iou', 0.004), ('cardinality_error', 7.669), ('loss_ce_0', 0.25), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.76), ('loss_caption', 1.772), ('total_loss', 9.759)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19418 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 8.256), ('loss_ce_0', 0.245), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.256), ('loss_caption_0', 1.754), ('loss_caption', 1.758), ('total_loss', 9.747)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19551 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.253), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.68), ('loss_caption', 1.689), ('total_loss', 9.3)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19684 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.005), ('cardinality_error', 7.474), ('loss_ce_0', 0.262), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.474), ('loss_caption_0', 1.81), ('loss_caption', 1.803), ('total_loss', 9.923)]), 
+time/iter = 0.165, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19817 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.247), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.769), ('loss_caption', 1.765), ('total_loss', 9.677)]), 
+time/iter = 0.164, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 19950 (epoch 14), 
+loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.253), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.736), ('loss_caption', 1.748), ('total_loss', 9.654)]), 
+time/iter = 0.153, bad_vid = 0.000
+
+Validation results of iter 19995:
+Bleu_1:0.19012877786294885
+Bleu_2:0.11743680046097797
+Bleu_3:0.06623934110461578
+Bleu_4:0.03314975306654321
+METEOR:0.08857227272587216
+ROUGE_L:0.17208518718096077
+CIDEr:0.5689998070546577
+Recall:0.3090681299310951
+Precision:0.43095498593310433
+soda_c:0.08081534748318767
+para_Bleu_1:0.3949292262433903
+para_Bleu_2:0.24183495416706074
+para_Bleu_3:0.1493168425692173
+para_Bleu_4:0.0941904023418332
+para_METEOR:0.16661877157717606
+para_ROUGE_L:0.3391544295873436
+para_CIDEr:0.3057631644012313
+
+overall score of iter 19995: 0.5665723383202406
+
+Save model at iter 19995 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 20083 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.519), ('loss_ce_0', 0.257), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.519), ('loss_caption_0', 1.743), ('loss_caption', 1.756), ('total_loss', 9.655)]), 
+time/iter = 0.703, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20216 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.759), ('loss_ce_0', 0.244), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.759), ('loss_caption_0', 1.79), ('loss_caption', 1.781), ('total_loss', 9.713)]), 
+time/iter = 0.168, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20349 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.245), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.749), ('loss_caption', 1.759), ('total_loss', 9.675)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20482 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.005), ('cardinality_error', 7.94), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.694), ('loss_caption', 1.715), ('total_loss', 9.502)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20615 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.005), ('cardinality_error', 7.368), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.368), ('loss_caption_0', 1.77), ('loss_caption', 1.771), ('total_loss', 9.775)]), 
+time/iter = 0.156, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20748 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.857), ('loss_ce_0', 0.247), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.857), ('loss_caption_0', 1.786), ('loss_caption', 1.773), ('total_loss', 9.695)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 20881 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.103), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.594), ('loss_ce_0', 0.242), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.594), ('loss_caption_0', 1.746), ('loss_caption', 1.748), ('total_loss', 9.541)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21014 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 8.09), ('loss_ce_0', 0.249), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.709), ('loss_caption', 1.698), ('total_loss', 9.49)]), 
+time/iter = 0.149, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21147 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.115), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.248), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.733), ('loss_caption', 1.732), ('total_loss', 9.57)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21280 (epoch 15), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.632), ('loss_ce_0', 0.245), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.632), ('loss_caption_0', 1.646), ('loss_caption', 1.658), ('total_loss', 9.233)]), 
+time/iter = 0.152, bad_vid = 0.000
+
+Validation results of iter 21328:
+Bleu_1:0.1927355202990476
+Bleu_2:0.11755729236198051
+Bleu_3:0.06532950485231373
+Bleu_4:0.0318670348131602
+METEOR:0.08966953019840175
+ROUGE_L:0.17549405824640266
+CIDEr:0.5708533801009449
+Recall:0.31055728552993345
+Precision:0.4412863394810881
+soda_c:0.08079399116249976
+para_Bleu_1:0.3847850395827542
+para_Bleu_2:0.23591168028694995
+para_Bleu_3:0.14500000021146267
+para_Bleu_4:0.09097906463153684
+para_METEOR:0.1633729521776342
+para_ROUGE_L:0.33764324525807
+para_CIDEr:0.3225522700715415
+
+overall score of iter 21328: 0.5769042868807126
+
+Save model at iter 21328 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 21413 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.541), ('loss_ce_0', 0.239), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.637), ('loss_caption', 1.633), ('total_loss', 9.069)]), 
+time/iter = 0.698, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21546 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.172), ('loss_self_iou', 0.004), ('cardinality_error', 7.624), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.624), ('loss_caption_0', 1.773), ('loss_caption', 1.784), ('total_loss', 9.621)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21679 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.238), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.809), ('loss_caption', 1.805), ('total_loss', 9.791)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21812 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.677), ('loss_ce_0', 0.25), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.677), ('loss_caption_0', 1.674), ('loss_caption', 1.676), ('total_loss', 9.277)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 21945 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.192), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.244), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.713), ('loss_caption', 1.714), ('total_loss', 9.531)]), 
+time/iter = 0.155, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22078 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.707), ('loss_ce_0', 0.247), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.772), ('loss_caption', 1.758), ('total_loss', 9.738)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22211 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.101), ('loss_bbox', 0.013), ('loss_giou', 0.18), ('loss_self_iou', 0.005), ('cardinality_error', 7.541), ('loss_ce_0', 0.249), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.665), ('loss_caption', 1.66), ('total_loss', 9.243)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22344 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 8.008), ('loss_ce_0', 0.248), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.008), ('loss_caption_0', 1.799), ('loss_caption', 1.784), ('total_loss', 9.823)]), 
+time/iter = 0.163, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22477 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.184), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.247), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.722), ('loss_caption', 1.733), ('total_loss', 9.525)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22610 (epoch 16), 
+loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 7.729), ('loss_ce_0', 0.245), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.664), ('loss_caption', 1.667), ('total_loss', 9.297)]), 
+time/iter = 0.154, bad_vid = 0.000
+
+Validation results of iter 22661:
+Bleu_1:0.1905629005997804
+Bleu_2:0.11689699082903934
+Bleu_3:0.06544029555928756
+Bleu_4:0.03330988693345351
+METEOR:0.08938496175202132
+ROUGE_L:0.17298359351524648
+CIDEr:0.5732307929342625
+Recall:0.309604513071417
+Precision:0.43046524955715343
+soda_c:0.08056479007503722
+para_Bleu_1:0.3975304274857351
+para_Bleu_2:0.24253918136446623
+para_Bleu_3:0.14848895422464012
+para_Bleu_4:0.09337330751749118
+para_METEOR:0.16677196164785574
+para_ROUGE_L:0.33750187221117683
+para_CIDEr:0.31278894258081524
+
+overall score of iter 22661: 0.5729342117461622
+
+Save model at iter 22661 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 22743 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.714), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.714), ('loss_caption_0', 1.773), ('loss_caption', 1.775), ('total_loss', 9.803)]), 
+time/iter = 0.714, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 22876 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.774), ('loss_ce_0', 0.249), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.76), ('loss_caption', 1.759), ('total_loss', 9.631)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23009 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.105), ('loss_bbox', 0.012), ('loss_giou', 0.171), ('loss_self_iou', 0.003), ('cardinality_error', 7.872), ('loss_ce_0', 0.237), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.872), ('loss_caption_0', 1.69), ('loss_caption', 1.688), ('total_loss', 9.229)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23142 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.098), ('loss_bbox', 0.013), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.744), ('loss_ce_0', 0.239), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.66), ('loss_caption', 1.663), ('total_loss', 9.173)]), 
+time/iter = 0.157, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23275 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 7.82), ('loss_ce_0', 0.242), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.727), ('loss_caption', 1.741), ('total_loss', 9.535)]), 
+time/iter = 0.160, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23408 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.235), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.173), ('loss_self_iou', 0.004), ('cardinality_error', 7.083), ('loss_ce_0', 0.235), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.083), ('loss_caption_0', 1.678), ('loss_caption', 1.68), ('total_loss', 9.181)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23541 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.185), ('loss_self_iou', 0.003), ('cardinality_error', 7.782), ('loss_ce_0', 0.253), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.686), ('loss_caption', 1.674), ('total_loss', 9.361)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23674 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.242), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.734), ('loss_caption', 1.755), ('total_loss', 9.502)]), 
+time/iter = 0.169, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23807 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 8.023), ('loss_ce_0', 0.248), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.838), ('loss_caption', 1.842), ('total_loss', 10.01)]), 
+time/iter = 0.176, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 23940 (epoch 17), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.246), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.661), ('loss_caption', 1.655), ('total_loss', 9.188)]), 
+time/iter = 0.168, bad_vid = 0.000
+
+Validation results of iter 23994:
+Bleu_1:0.19099469488969467
+Bleu_2:0.11646897839764006
+Bleu_3:0.06451308365995856
+Bleu_4:0.032200079484133
+METEOR:0.08912416771202449
+ROUGE_L:0.1730757893125124
+CIDEr:0.5693051160396969
+Recall:0.3097042977992106
+Precision:0.43274547601681085
+soda_c:0.08084297498321232
+para_Bleu_1:0.3924031546442418
+para_Bleu_2:0.23911474626028398
+para_Bleu_3:0.14600811918196227
+para_Bleu_4:0.09107950853175292
+para_METEOR:0.16594454181978452
+para_ROUGE_L:0.33729101832099057
+para_CIDEr:0.30892642009784
+
+overall score of iter 23994: 0.5659504704493774
+
+Save model at iter 23994 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 24073 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.11), ('loss_bbox', 0.012), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.97), ('loss_ce_0', 0.246), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.689), ('loss_caption', 1.683), ('total_loss', 9.309)]), 
+time/iter = 0.720, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24206 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.118), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 8.286), ('loss_ce_0', 0.236), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.286), ('loss_caption_0', 1.712), ('loss_caption', 1.715), ('total_loss', 9.432)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24339 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.098), ('loss_bbox', 0.012), ('loss_giou', 0.167), ('loss_self_iou', 0.003), ('cardinality_error', 7.316), ('loss_ce_0', 0.247), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.316), ('loss_caption_0', 1.695), ('loss_caption', 1.701), ('total_loss', 9.257)]), 
+time/iter = 0.159, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24472 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.003), ('cardinality_error', 7.459), ('loss_ce_0', 0.248), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.459), ('loss_caption_0', 1.699), ('loss_caption', 1.699), ('total_loss', 9.337)]), 
+time/iter = 0.158, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24605 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.18), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.243), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.775), ('loss_caption', 1.773), ('total_loss', 9.644)]), 
+time/iter = 0.163, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24738 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.101), ('loss_bbox', 0.016), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.556), ('loss_ce_0', 0.246), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.556), ('loss_caption_0', 1.727), ('loss_caption', 1.73), ('total_loss', 9.525)]), 
+time/iter = 0.166, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 24871 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.241), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.77), ('loss_caption', 1.773), ('total_loss', 9.641)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25004 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 8.143), ('loss_ce_0', 0.247), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.143), ('loss_caption_0', 1.692), ('loss_caption', 1.684), ('total_loss', 9.379)]), 
+time/iter = 0.151, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25137 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.179), ('loss_self_iou', 0.004), ('cardinality_error', 7.88), ('loss_ce_0', 0.245), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.88), ('loss_caption_0', 1.691), ('loss_caption', 1.696), ('total_loss', 9.347)]), 
+time/iter = 0.154, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25270 (epoch 18), 
+loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.185), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.238), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.687), ('loss_caption', 1.694), ('total_loss', 9.34)]), 
+time/iter = 0.146, bad_vid = 0.000
+
+Validation results of iter 25327:
+Bleu_1:0.19191750615066444
+Bleu_2:0.11783589874301872
+Bleu_3:0.06597231596326529
+Bleu_4:0.03167603834812624
+METEOR:0.08996609888818348
+ROUGE_L:0.1746391859525846
+CIDEr:0.5689023016363987
+Recall:0.31503357525649683
+Precision:0.4376628112951966
+soda_c:0.08097707611185051
+para_Bleu_1:0.3977375551078834
+para_Bleu_2:0.24323062675170298
+para_Bleu_3:0.1488548587270082
+para_Bleu_4:0.09292110149283073
+para_METEOR:0.16716298804356167
+para_ROUGE_L:0.33781551083855066
+para_CIDEr:0.31014493696748857
+
+overall score of iter 25327: 0.570229026503881
+
+Save model at iter 25327 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+ID seq2-ft(mix)-gt_percent-1.0 iter 25403 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.005), ('cardinality_error', 7.429), ('loss_ce_0', 0.248), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.429), ('loss_caption_0', 1.705), ('loss_caption', 1.695), ('total_loss', 9.343)]), 
+time/iter = 0.723, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25536 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.189), ('loss_self_iou', 0.003), ('cardinality_error', 7.887), ('loss_ce_0', 0.246), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.887), ('loss_caption_0', 1.717), ('loss_caption', 1.729), ('total_loss', 9.517)]), 
+time/iter = 0.163, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25669 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.707), ('loss_ce_0', 0.243), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.718), ('loss_caption', 1.711), ('total_loss', 9.385)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25802 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 8.173), ('loss_ce_0', 0.242), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.173), ('loss_caption_0', 1.732), ('loss_caption', 1.735), ('total_loss', 9.515)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 25935 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.241), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.626), ('loss_caption', 1.628), ('total_loss', 9.063)]), 
+time/iter = 0.153, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26068 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.182), ('loss_self_iou', 0.005), ('cardinality_error', 7.444), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.444), ('loss_caption_0', 1.697), ('loss_caption', 1.701), ('total_loss', 9.35)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26201 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.097), ('loss_bbox', 0.014), ('loss_giou', 0.168), ('loss_self_iou', 0.005), ('cardinality_error', 7.301), ('loss_ce_0', 0.237), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.301), ('loss_caption_0', 1.702), ('loss_caption', 1.703), ('total_loss', 9.254)]), 
+time/iter = 0.161, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26334 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.238), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.174), ('loss_self_iou', 0.003), ('cardinality_error', 7.827), ('loss_ce_0', 0.242), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.729), ('loss_caption', 1.725), ('total_loss', 9.424)]), 
+time/iter = 0.164, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26467 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 8.023), ('loss_ce_0', 0.245), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.751), ('loss_caption', 1.746), ('total_loss', 9.586)]), 
+time/iter = 0.162, bad_vid = 0.000
+ID seq2-ft(mix)-gt_percent-1.0 iter 26600 (epoch 19), 
+loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.902), ('loss_ce_0', 0.242), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.902), ('loss_caption_0', 1.727), ('loss_caption', 1.737), ('total_loss', 9.533)]), 
+time/iter = 0.156, bad_vid = 0.000
+
+Validation results of iter 26660:
+Bleu_1:0.1908811984292725
+Bleu_2:0.11664270449592412
+Bleu_3:0.06546844271584715
+Bleu_4:0.03266470081303028
+METEOR:0.08981101020496235
+ROUGE_L:0.17382953846907112
+CIDEr:0.5716745559959934
+Recall:0.31292035599338697
+Precision:0.4345220728699943
+soda_c:0.08127095018359767
+para_Bleu_1:0.40170065588267356
+para_Bleu_2:0.2447870245859959
+para_Bleu_3:0.14990588787772124
+para_Bleu_4:0.09419227635900729
+para_METEOR:0.16780671784283924
+para_ROUGE_L:0.33845945539662686
+para_CIDEr:0.3198675630646056
+
+overall score of iter 26660: 0.5818665572664521
+
+Save model at iter 26660 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth.
+Save info to info.json
+Best epoch: 11
+
+Best Model Performance:
+Bleu_1:0.1989422607268001
+Bleu_2:0.12223038556953512
+Bleu_3:0.06835990671747892
+Bleu_4:0.03486159828438583
+METEOR:0.09408978838449876
+ROUGE_L:0.18200142867223945
+CIDEr:0.593480700759431
+Recall:0.30795469953703025
+Precision:0.4513424333993264
+soda_c:0.0796861065455984
+para_Bleu_1:0.39594509057043764
+para_Bleu_2:0.24087109399513515
+para_Bleu_3:0.14790262814870953
+para_Bleu_4:0.09321042711819619
+para_METEOR:0.1655617051143519
+para_ROUGE_L:0.3391051008488012
+para_CIDEr:0.32807196750555834
+avg_proposal_number:-1
+
+Best Overall Score epoch11: 1.5265537286258848
+
diff --git a/yc2_univl/val.log b/yc2_univl/val.log
new file mode 100644
index 0000000000000000000000000000000000000000..76f83c2963d4b440439af55ee7506b115beba8c3
--- /dev/null
+++ b/yc2_univl/val.log
@@ -0,0 +1,21 @@
+Best Model Performance:
+Bleu_1:0.1989422607268001
+Bleu_2:0.12223038556953512
+Bleu_3:0.06835990671747892
+Bleu_4:0.03486159828438583
+METEOR:0.09408978838449876
+ROUGE_L:0.18200142867223945
+CIDEr:0.593480700759431
+Recall:0.30795469953703025
+Precision:0.4513424333993264
+soda_c:0.0796861065455984
+para_Bleu_1:0.39594509057043764
+para_Bleu_2:0.24087109399513515
+para_Bleu_3:0.14790262814870953
+para_Bleu_4:0.09321042711819619
+para_METEOR:0.1655617051143519
+para_ROUGE_L:0.3391051008488012
+para_CIDEr:0.32807196750555834
+avg_proposal_number:-1
+
+Best Overall Score epoch11: 1.5265537286258848